In [1]:
# python
import os, sys, pickle
from itertools import combinations_with_replacement, combinations, product
from collections import OrderedDict

# stats
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, spearmanr
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.api import OLS
from pandas.api.types import CategoricalDtype

# plot
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns

# neural networks
import torch, torch.utils.model_zoo  # required to load nets
from torchvision.models.feature_extraction import (
    get_graph_node_names,
    create_feature_extractor,
)

# analysis code
from lib.transforms import VisualPriorRepresentation
from lib.functions_second_analysis import *
from lib.functions_scripting import *

PATH_IMAGES = "../images and ratings/imageversions_256"
PATH_RATINGS = "../images and ratings/ratings"

# !! correlations, invert sign for integration
PATH_INTEGRATION = "../data csv/integration"
PATH_INTEGRATION_MAXPOOL = "../data csv/integration maxpool"
PATH_INTEGRATION_AVERAGE = "../data csv/integration average"

PATH_IBCORR = "../data csv/ibcorr"
PATH_IBCORR_AVERAGE = "../data csv/ibcorr average"
PATH_IBCORR_MAXPOOL = "../data csv/ibcorr maxpool"

PATH_RESULTS = "../results"
PATH_PLOTS = "../plots"

beauty_ratings = load_ratings(PATH_RATINGS)

# load

In [30]:
_models = MODEL_NAMES
_studies = STUDY_NAMES
_datasets = DATASET_NAMES
_scales = SCALE_NAMES

In [None]:
_models = MODEL_NAMES
_studies = "short presentation"
_datasets = "places1"
_scales = "scale4"

In [2]:
_models = NETS_ALL
_studies = STUDY_NAMES
_datasets = DATASET_NAMES
_scales = SCALE_NAMES

In [None]:
# subselect models that have significant predictions at scale4
models_to_remove = set(
    ["edge_occlusion", "inpainting", "keypoints", "normal", "jigsaw"]
)
_models = list(set(MODEL_NAMES).difference(models_to_remove))

In [2]:
# no subselections
dfi = load_integration(PATH_INTEGRATION)
dfibc = load_ibcorr(PATH_IBCORR)

In [4]:
# unblocked data
_blocking = None
_layers = slice(None)

dfi = load_integration(PATH_INTEGRATION)
dfibc = load_ibcorr(PATH_IBCORR)

dfi = dfi.sort_index().loc[(_models, _datasets, _scales, slice(None), _layers)]
dfibc = dfibc.sort_index().loc[(_models, _studies, _scales, _layers)]

In [3]:
# averaged data
_blocking = "average"
_layers = slice(None)

dfi = load_integration(PATH_INTEGRATION_AVERAGE)
dfibc = load_ibcorr(PATH_IBCORR_AVERAGE)

dfi = dfi.sort_index().loc[(_models, _datasets, _scales, slice(None), _layers)]
dfibc = dfibc.sort_index().loc[(_models, _studies, _scales, _layers)]

In [68]:
# maxpooled data
_blocking = "maxpool"
_layers = slice(None)

dfi = load_integration(PATH_INTEGRATION_MAXPOOL)
dfibc = load_ibcorr(PATH_IBCORR_MAXPOOL)

dfi = dfi.sort_index().loc[(_models, _datasets, _scales, slice(None), _layers)]
dfibc = dfibc.sort_index().loc[(_models, _studies, _scales, _layers)]

# Target 01: differences in ib-corr

absoloute difference in correlation in each layer, summed up. <br>
Optional: Normalized with 2 (spearman correlation range) * num_layers 

In [3]:
# ABSOLOUTE DIFFERENCES
target_rdm_absdiff = (
    (
        dfibc.unstack("model")
        .groupby(["study", "scale"])
        .apply(calculate_rdm, ctype="absdiff")
    )
    .droplevel(None)
    .droplevel(None, axis=1)
)

In [20]:
# target_rdm_absdiff.to_hdf(os.path.join(PATH_RESULTS, "compare predictions", "absoloute differences.h5"), key="target_rdm")
target_rdm_absdiff = pd.read_hdf(
    os.path.join(PATH_RESULTS, "compare predictions", "absoloute differences.h5"),
    key="target_rdm",
)

In [None]:
# df = dfibc.unstack("layer")
# rdm_target_absdiff = pd.DataFrame(0, index=_models, columns=_models)

# for c1, c2 in combinations(_models, 2):
#     # sum of absoloute differences (easier, no correlation of correlated correlation coefficients)
#     rdm_target_absdiff.loc[c1, c2] = (df.loc[c1] - df.loc[c2]).abs().sum()
#     rdm_target_absdiff.loc[c2, c1] = (df.loc[c1] - df.loc[c2]).abs().sum()

# # norming dissimilarity: sum_abs_diff / (num_layers * 2)
# sns.heatmap(
#     rdm_target_absdiff,
#     xticklabels=rdm_target_absdiff.columns,
#     yticklabels=rdm_target_absdiff.index,
# )
# target_rdm = rdm_target_absdiff

In [None]:
# CORRELATIONS
# # TODO: add correlate correlations
# # TODO check correct oder of models for this
# df = dfibc.unstack("layer")
# rdm_target_corrwise = pd.DataFrame(0, index=_models, columns=_models)

# for c1, c2 in combinations(_models, 2):
#     rdm_target_corrwise.loc[c1, c2] = pearsonr(df.loc[c1], df.loc[c2])[0]
#     rdm_target_corrwise.loc[c2, c1] = pearsonr(df.loc[c1], df.loc[c2])[0]

# # norming dissimilarity: sum_abs_diff / (num_layers * 2)
# sns.heatmap(
#     rdm_target_corrwise,
#     xticklabels=rdm_target_corrwise.columns,
#     yticklabels=rdm_target_corrwise.index,
# )
# target_rdm = rdm_target_corrwise

# Predictors-Target analysis
do for each study and each scale, to check if there is some consistency in which factors always comes out on top



In [None]:
target_rdm = target_rdm_absdiff.loc[("short presentation", "scale4")]

In [149]:
t = rdm2vec(target_rdm)

In [None]:
# variance partitioning
def predictor_r2(predictors: list[pd.DataFrame], target):
    predictors = np.stack([rdm2vec(_rdm).transpose() for _rdm in predictors], axis=1)
    predictors = sm.add_constant(predictors)
    model = sm.OLS(target, predictors)
    results = model.fit()
    return results.rsquared

## predictor 1 - target

In [143]:
pearsonr(p1, t)

(0.07931940015309769, 0.20861366351184038)

In [150]:
correlate_rdms(predictor_rdm_1.loc[NETS_ALL, NETS_ALL], target_rdm.loc[NETS_ALL, NETS_ALL])

(-0.29001848514913353, 0.002690861862071442)

In [193]:
predictor_rdm_1.shape

(23, 23)

In [201]:
predictor_rdm_1.shape

(23, 23)

In [199]:
target_rdm.shape

(23, 23)

In [203]:
predictor_rdm_1

Unnamed: 0,autoencoding,depth_euclidean,jigsaw,reshading,edge_occlusion,keypoints2d,room_layout,curvature,edge_texture,keypoints3d,...,nonfixated_pose,segment_unsup25d,class_scene,fixated_pose,normal,segment_semantic,denoising,inpainting,point_matching,vanishing_point
autoencoding,1,0,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,1,1,0,0
depth_euclidean,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
jigsaw,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
reshading,0,0,0,1,1,0,0,1,0,1,...,0,1,0,0,1,0,0,0,0,0
edge_occlusion,0,0,0,1,1,0,0,1,0,1,...,0,1,0,0,1,0,0,0,0,0
keypoints2d,1,0,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,1,1,0,0
room_layout,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
curvature,0,0,0,1,1,0,0,1,0,1,...,0,1,0,0,1,0,0,0,0,0
edge_texture,1,0,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,1,1,0,0
keypoints3d,0,0,0,1,1,0,0,1,0,1,...,0,1,0,0,1,0,0,0,0,0


In [202]:
target_rdm

model,autoencoding,class_object,class_scene,curvature,denoising,depth_euclidean,edge_occlusion,edge_texture,egomotion,fixated_pose,...,keypoints3d,nonfixated_pose,normal,point_matching,reshading,room_layout,segment_semantic,segment_unsup25d,segment_unsup2d,vanishing_point
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
autoencoding,0.0,8.720643,10.245462,11.560273,2.300163,12.35446,10.751507,10.301925,7.106467,6.932489,...,12.777257,7.820332,14.337799,5.792216,11.865939,12.802817,11.361108,11.190042,14.14435,14.067232
class_object,8.720643,0.0,4.668494,5.136402,7.711176,5.873315,4.239254,6.2035,3.037687,5.160542,...,6.125608,4.166002,7.775462,4.763991,5.782873,6.468299,5.207982,4.613443,7.488759,7.592471
class_scene,10.245462,4.668494,0.0,2.879982,10.086942,3.535304,3.428632,6.583889,4.260549,4.217045,...,4.197462,3.741955,5.009428,5.378213,3.684291,4.051652,4.766741,3.829726,4.673175,5.842049
curvature,11.560273,5.136402,2.879982,0.0,11.36526,3.044653,2.693327,6.381715,5.28763,5.182675,...,2.431945,4.465542,3.348786,6.387933,3.107249,3.392399,4.252471,3.43957,3.776034,4.535102
denoising,2.300163,7.711176,10.086942,11.36526,0.0,12.275121,10.574539,9.634885,6.617355,6.606753,...,12.598871,7.570361,14.035662,5.296382,11.580376,12.425291,10.93129,11.023551,13.924655,13.87275
depth_euclidean,12.35446,5.873315,3.535304,3.044653,12.275121,0.0,3.421892,6.412434,6.014866,6.771651,...,4.068905,5.460561,4.662241,7.427933,4.654122,4.612069,3.654886,3.587521,4.566941,4.746871
edge_occlusion,10.751507,4.239254,3.428632,2.693327,10.574539,3.421892,0.0,5.702883,4.151657,5.579604,...,3.002468,3.929731,4.594565,5.812893,3.661489,4.355596,4.170364,2.573414,4.385174,4.574199
edge_texture,10.301925,6.2035,6.583889,6.381715,9.634885,6.412434,5.702883,0.0,5.911384,6.865827,...,6.289865,6.274059,6.919198,6.697624,6.404736,6.96243,6.270901,6.233728,6.381317,6.2245
egomotion,7.106467,3.037687,4.260549,5.28763,6.617355,6.014866,4.151657,5.911384,0.0,3.71358,...,6.462083,2.150931,7.955973,2.892818,5.264403,6.518689,5.34073,5.019808,7.432338,7.435854
fixated_pose,6.932489,5.160542,4.217045,5.182675,6.606753,6.771651,5.579604,6.865827,3.71358,0.0,...,6.300387,3.062507,7.428909,2.997319,5.073173,5.999875,7.127146,6.102898,7.358039,8.192836


In [197]:
predictors_r2(predictor_rdm_1, target_rdm)

ValueError: The indices for endog and exog are not aligned

In [152]:
predictors_r2(rdm2vec(predictor_rdm_1.loc[NETS_ALL, NETS_ALL]), rdm2vec(target_rdm.loc[NETS_ALL, NETS_ALL]))

0.08411072172819822

## predictor2 - target

In [None]:
f2 = factor2_rdm_alllayers.loc[("places1","scale4"),("places1","scale4")]

In [162]:
f2

Unnamed: 0,autoencoding,class_object,class_scene,curvature,denoising,depth_euclidean,edge_occlusion,edge_texture,egomotion,fixated_pose,...,keypoints3d,nonfixated_pose,normal,point_matching,reshading,room_layout,segment_semantic,segment_unsup25d,segment_unsup2d,vanishing_point
autoencoding,,0.120553,0.099613,0.620845,0.440686,0.259606,0.27487,0.164451,0.135081,0.337666,...,0.316053,0.186518,0.294447,0.38298,0.302508,0.22997,0.222017,0.442672,0.181739,0.161203
class_object,0.120553,,0.953411,0.46571,0.21193,0.789665,0.815153,0.611867,0.70212,0.81697,...,0.797041,0.872127,0.765332,0.792897,0.761156,0.865824,0.892269,0.720315,0.890437,0.547735
class_scene,0.099613,0.953411,,0.430506,0.172858,0.751446,0.789595,0.633328,0.712859,0.808762,...,0.758076,0.865919,0.704542,0.768783,0.714887,0.845796,0.828347,0.675602,0.876735,0.558356
curvature,0.620845,0.46571,0.430506,,0.45431,0.705277,0.73341,0.345621,0.544716,0.757458,...,0.713664,0.516564,0.644461,0.566034,0.633795,0.539563,0.578745,0.747232,0.525738,0.347146
denoising,0.440686,0.21193,0.172858,0.45431,,0.292887,0.307077,0.124808,0.355372,0.278492,...,0.342935,0.235345,0.282192,0.32324,0.29791,0.284873,0.357725,0.467063,0.23237,0.348321
depth_euclidean,0.259606,0.789665,0.751446,0.705277,0.292887,,0.873849,0.529081,0.740228,0.868674,...,0.851096,0.767064,0.85227,0.710028,0.800894,0.78492,0.818985,0.783113,0.788529,0.59336
edge_occlusion,0.27487,0.815153,0.789595,0.73341,0.307077,0.873849,,0.495538,0.799273,0.861582,...,0.895906,0.803852,0.869334,0.752845,0.754991,0.724061,0.793679,0.794493,0.84354,0.514947
edge_texture,0.164451,0.611867,0.633328,0.345621,0.124808,0.529081,0.495538,,0.473085,0.669166,...,0.494097,0.605675,0.420897,0.606588,0.51923,0.660563,0.602655,0.493007,0.580108,0.638222
egomotion,0.135081,0.70212,0.712859,0.544716,0.355372,0.740228,0.799273,0.473085,,0.809485,...,0.707248,0.76692,0.632728,0.701396,0.603666,0.692742,0.60793,0.649462,0.760676,0.632246
fixated_pose,0.337666,0.81697,0.808762,0.757458,0.278492,0.868674,0.861582,0.669166,0.809485,,...,0.851196,0.87133,0.74903,0.833729,0.812376,0.870863,0.814144,0.777113,0.86498,0.65259


In [72]:
correlate_rdms(t, f2)

(-0.6677444638211117, 4.977027167376115e-34)

In [214]:
f2  = factor2_rdm_bestlayer.loc[("short presentation","scale4")]

In [228]:
correlate_rdms(t, f2)

(-0.5236855635420519, 3.2151495763391566e-19)

In [None]:
factor2_rdm_bestlayer

In [222]:
target_rdm_absdiff.groupby(["study","scale"])

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7c0ccc0d1a60>

In [None]:
for (name1, group1), (name2, group2) in zip(factor2_rdm_bestlayer.groupby(["study","scale"]), target_rdm_absdiff.groupby(["study","scale"])):
    print(name1==name2)

In [236]:
rdm2vec(f2), rdm2vec(t);

In [237]:
predictors_r2(rdm2vec(f2), rdm2vec(t))

0.3648911015205908

## predictor 3 - target

In [None]:
f3 = factor3_rdm

In [None]:
correlate_rdms(f3, t)

(-0.37464126075879833, 7.500535101774924e-10)

In [None]:
predictors_r2(rdm2vec(f3), rdm2vec(t))

0.14035607426294194

In [None]:
spearmanr(t, p3)

In [None]:
plt.scatter(t, p3, s=10, color="magenta")

In [None]:
sns.kdeplot(pd.DataFrame((t, p3)).T, x=0, y=1, fill=True,levels=100)
plt.scatter(t, p3, s=10, color="magenta")
plt.xlim(0,t.max())
plt.ylim(0,p1.max())

In [None]:
plt.scatter(t.argsort(), p3.argsort(), s=10, color="magenta")

In [None]:
sns.kdeplot(pd.DataFrame((t.argsort(), p3.argsort())).T, x=0, y=1, fill=True,levels=3)
plt.scatter(t.argsort(), p3.argsort(), s=10, color="magenta")
plt.xlim(0,t.size)
plt.ylim(0,t.size)

## variance partitioning

In [None]:
predictors_r2(p1, t), predictors_r2(p2, t), predictors_r2(p3, t)

In [None]:
predictors_r2(np.stack((p1, p3), axis=1), t) - predictors_r2(p3, t)

In [None]:
# plot full venn diagram (with 2 or 3 predictors)