# Graph Comparison Script
This script aims to compare the exploration graphs with the path graphs.

Requirements: 
   * Exploration 
       * Centrality_df
       * RichClubDoc
   * Pathfinding (Single, SingleC, Dyadic)
       * Centrality_df of all paths and combined. 
       
   

## Creating the required environment (skip if already done)

Running the following cell will create a file graphs.yml that can be used to setup a conda environment containing the required packages. If you already downloaded the file from my GitHub, skip the next cell and create the env directly from it.

In [None]:
%%writefile graphs.yml
name: graphs
channels:
  - conda-forge
  - defaults
dependencies:
  - python=3.6
  - jupyter
  - imageio
  - imageio-ffmpeg
  - matplotlib
  - scikit-image
  - opencv
  - networkx
  - pandas
  - statsmodels

### Environment Creation
To create the environment, open the terminal, go to the directory where you stored the graphs.yml file (the directory of the notebook) and type
conda env create -f graphs.yml
After running this command you have to activate the environment (Linux/MacOS: conda activate graphs, Windows: activate graphs) and then reopen the notebook in that environment.

In [1]:
import os
import cv2
import json
import numpy as np
import re
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import glob
import scipy.cluster.vq as clusters
import scipy.sparse as sparse
import warnings
import random_graph
warnings.simplefilter(action='ignore', category=FutureWarning)

from scipy.special import binom as nchoosek
from sklearn.preprocessing import normalize
from pandas.plotting import autocorrelation_plot as AC_plot 
from statsmodels.graphics import tsaplots
from statsmodels.tsa.stattools import acf
from skimage.filters import gaussian
from mpl_toolkits.mplot3d import Axes3D 
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.axes_grid1 import make_axes_locatable, axes_size
from matplotlib import gridspec


OG_DATA_PATH  = './'
GIT_DATA_PATH = './Results/'
GIT_GRAPH_PATH   = './Results/Graphs/'
GIT_SINGLE_PATH  = './Results/Single/'
GIT_SINGLEC_PATH = './Results/SingleC/'
GIT_DYADIC_PATH  = './Results/Dyadic/'
RESSOURCES_PATH  = './Ressources/'
GIT_PROCESSED_DATA_PATH = './Results/Comparison/'

# Getting the Folder without hidden files in ascending order 
GIT_DATA_FOLDER = sorted([f for f in os.listdir(GIT_DATA_PATH) if not f.startswith('.')], key=str.lower)
GIT_PROCESSED_DATA_FOLDER = sorted([f for f in os.listdir(GIT_PROCESSED_DATA_PATH) if not f.startswith('.')], key=str.lower)
GIT_GRAPH_FOLDER = sorted([f for f in os.listdir(GIT_GRAPH_PATH) if not f.startswith('.')], key=str.lower)
GIT_SINGLE_FOLDER = sorted([f for f in os.listdir(GIT_SINGLE_PATH) if not f.startswith('.')], key=str.lower)
GIT_SINGLEC_FOLDER = sorted([f for f in os.listdir(GIT_SINGLEC_PATH) if not f.startswith('.')], key=str.lower)
GIT_DYADIC_FOLDER = sorted([f for f in os.listdir(GIT_DYADIC_PATH) if not f.startswith('.')], key=str.lower)

#houselist 
house_file = RESSOURCES_PATH + 'building_collider_list.csv'
try:
    houselist = pd.read_csv(house_file)
except:
    print('HouseList could not be loaded!')
    
# global variables
path_num = 9
conditions = ['Single', 'SingleC', 'Dyadic']
directions = ['A', 'B']

    
fontsize = 20

green = [0.40,0.80,0.42]
blue = [0.27,0.38,0.99]
grey = [0.75,0.75,0.75]

# loading function for csv files 
def load_csv(path, filename):
    try: 
        with open(path + filename) as f:
            file = pd.read_csv(f)
    except:
                print("\tCould not load file - " + filename + " - !")
    return file 

In [12]:
subIDs = []
for sub in GIT_DATA_FOLDER:
    if sub[0].isdigit():
        subIDs.append(int(sub[0:4]))
    else:
        pass
subIDs = np.unique(subIDs)
print(subIDs)

[1005 1008 1010 1011 1013 1017 1021 1023]


In [2]:
# custom subID 
subIDs = [1008]

In [3]:
# data dictionary for all the path data

data_dict = {'Single': {'A': {}, 'B': {}}, 
             'SingleC':{'A': {}, 'B': {}}, 
             'Dyadic': {'A': {'Leader': {}, 'Follower': {}}, 'B': {'Leader': {}, 'Follower': {}}}
            }


for condition in conditions:
    for direction in directions:
        for path in range(1,path_num+1):
            if condition != 'Dyadic':
                data_dict[condition][direction][str(path)] = \
                    load_csv(GIT_DATA_PATH+condition+'/Graphs/', direction+'_'+str(path)+'_centrality_df.csv')
            else:
                try: 
                    data_dict[condition][direction]['Leader'] = \
                        load_csv(GIT_DATA_PATH+condition+'/Graphs/', direction+'_'+str(path)+'_L_centrality_df.csv')
                    
                    data_dict[condition][direction]['Follower'] = \
                        load_csv(GIT_DATA_PATH+condition+'/Graphs/', direction+'_'+str(path)+'_F_centrality_df.csv')
                except:
                    pass
        
        

	Could not load file - A_1_F_centrality_df.csv - !
	Could not load file - A_2_F_centrality_df.csv - !
	Could not load file - A_3_F_centrality_df.csv - !
	Could not load file - A_4_F_centrality_df.csv - !
	Could not load file - A_5_F_centrality_df.csv - !
	Could not load file - A_6_F_centrality_df.csv - !
	Could not load file - A_7_F_centrality_df.csv - !
	Could not load file - A_8_F_centrality_df.csv - !
	Could not load file - A_9_F_centrality_df.csv - !
	Could not load file - B_3_F_centrality_df.csv - !
	Could not load file - B_4_F_centrality_df.csv - !
	Could not load file - B_5_F_centrality_df.csv - !
	Could not load file - B_6_F_centrality_df.csv - !
	Could not load file - B_7_F_centrality_df.csv - !
	Could not load file - B_8_F_centrality_df.csv - !
	Could not load file - B_9_F_centrality_df.csv - !


In [144]:
data_dict

{'Single': {'A': {'1':    Subject  Building_100  Building_101  Building_102  Building_103  \
   0     1005           NaN           NaN           NaN           1.0   
   1     1013           4.0      4.000000           NaN           NaN   
   2     1019           NaN      2.000000           NaN           2.0   
   3     1021           NaN           NaN           NaN           NaN   
   4     1022           2.0      3.000000      1.000000           3.0   
   5     1023           6.0      2.000000           NaN           NaN   
   6     1055           2.0      6.000000      2.000000           4.0   
   7     1056           3.0      2.000000      3.000000           1.0   
   8     1069           3.0           NaN      2.000000           2.0   
   9     1074           4.0      3.000000           NaN           2.0   
   10    1075           1.0      2.000000           NaN           1.0   
   11    1079           2.0      2.000000      2.000000           2.0   
   12    Mean           3.0    

In [30]:
# ------ PREPARATION ------

correlations_all = {'Single': {'A': {}, 'B': {}}, 
                    'SingleC':{'A': {}, 'B': {}}, 
                    'Dyadic': {'A': {'Leader': {}, 'Follower': {}}, 'B': {'Leader': {}, 'Follower': {}}}
                    }

for condition in conditions:
    for direction in directions:
        for path in range(1, path_num+1):
            if condition == 'Dyadic':
                correlations_all[condition][direction]['Leader'][str(path)] = []
                correlations_all[condition][direction]['Follower'][str(path)] = []
            else:
                correlations_all[condition][direction][str(path)] = []
            
                


# ------ LOADING ------

# exploration
# open the exploration centrality csv file as dictionary
centrality_df = load_csv(GIT_GRAPH_PATH, 'centrality_df.csv')
            
# pathfinding 
# open the pathfinding condition csv files
single_info = load_csv(GIT_SINGLE_PATH, 'Single_Performance_Analysis.csv')
singleC_info = load_csv(GIT_SINGLEC_PATH, 'SingleC_Performance_Analysis.csv')
dyadic_info = load_csv(GIT_DYADIC_PATH, 'Dyadic_Performance_Analysis.csv')

#all_paths_A_centrality_df = load_csv(GIT_GRAPH_PATH, 'A_all_centrality_df.csv')
#all_paths_B_centrality_df = load_csv(GIT_GRAPH_PATH, 'B_all_centrality_df.csv')
          
# Load later in the loop

            
# three for loops
# 1. for the condition (A or B)
    # 2. for the path
# 3. for the subject

for sub in subIDs:
    
    # the degree data of the subject
    centrality_data = centrality_df[centrality_df.Subject == str(sub)].copy()
        
    for path in range(1, path_num+1):

    # find out in which condition the subject was 

        # If Single 
        if sub in list(single_info.SubjectID):
            # set the condition 
            condition = conditions[0]
            
            # check for direction
            direction = single_info[single_info.SubjectID == sub].Condition.values[0]
            
            path_data = data_dict[condition][direction][str(path)]
            path_data = path_data[path_data.Subject == sub]
            # filter out the subject row
            path_data = data_dict[condition][direction][str(path)]
            path_data = path_data[path_data.Subject == str(sub)]
            # drop nan values as well as Subject Mean and STD column
            path_data = path_data.dropna(axis=1)
            path_data = path_data.drop(['Subject', 'Mean', 'STD'], axis=1)
            path_data = path_data.reset_index(drop=True)
            
            centrality_pathhouses = centrality_data[path_data.columns].copy()
            centrality_pathhouses = centrality_pathhouses.reset_index(drop=True)
            
            correlations_all[condition][direction][str(path)].append(
                centrality_pathhouses.corrwith(path_data, axis=1).values[0])

        elif sub in list(singleC_info.SubjectID):
            # set the condition 
            condition = conditions[1]
            
            # check for direction
            direction = singleC_info[singleC_info.SubjectID == sub].Condition.values[0]

            path_data = data_dict[condition][direction][str(path)]
            path_data = path_data[path_data.Subject == sub]
            # filter out the subject row
            path_data = data_dict[condition][direction][str(path)]
            path_data = path_data[path_data.Subject == str(sub)]
            # drop nan values as well as Subject Mean and STD column
            path_data = path_data.dropna(axis=1)
            path_data = path_data.drop(['Subject', 'Mean', 'STD'], axis=1)
            path_data = path_data.reset_index(drop=True)
            
            centrality_pathhouses = centrality_data[path_data.columns].copy()
            centrality_pathhouses = centrality_pathhouses.reset_index(drop=True)
            
            correlations_all[condition][direction][str(path)].append(
                centrality_pathhouses.corrwith(path_data, axis=1).values[0])
            
        elif sub in list(dyadic_info.SubjectID):
            # set the condition 
            condition = conditions[2]
            
            # check for direction
            direction = dyadic_info[dyadic_info.SubjectID == sub].Condition.values[0] 

            if dyadic_info[dyadic_info.SubjectID == sub]['Leader?'] == True:
                role = 'Leader'
            else:
                role = 'Follower'
                
            path_data = data_dict[condition][direction][role][str(path)]
            path_data = path_data[path_data.Subject == sub]
            # filter out the subject row
            path_data = data_dict[condition][direction][role][str(path)]
            path_data = path_data[path_data.Subject == str(sub)]
            # drop nan values as well as Subject Mean and STD column
            path_data = path_data.dropna(axis=1)
            path_data = path_data.reset_index(drop=True)
            path_data = path_data.drop(['Subject', 'Mean', 'STD'], axis=1)
            
            centrality_pathhouses = centrality_data[path_data.columns].copy()
            centrality_pathhouses = centrality_pathhouses.reset_index(drop=True)
            
            correlations_all[condition][direction][str(path)].append(
                centrality_pathhouses.corrwith(path_data, axis=1).values[0])

        else:
            print('Subject ' + str(sub) + ' is not in any condition!')


In [49]:
# calculate the mean path correlation of degree distributions
mean_corr_df = pd.DataFrame()

for condition in conditions:
    for direction in directions:
        for path in range(1, path_num+1):
            if condition != 'Dyadic':
                mean_corr_df.loc[0, condition + '-' + direction + '-' + str(path)] = np.mean(correlations_all[condition][direction][str(path)])
            else:
                mean_corr_df.loc[0, condition + '-' + direction + '-' + str(path) + ' - Leader'] = \
                    np.mean(correlations_all[condition][direction]['Leader'][str(path)])
                mean_corr_df.loc[0, condition + '-' + direction + '-' + str(path) + ' - Follower'] = \
                    np.mean(correlations_all[condition][direction]['Follower'][str(path)])
                
                

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [57]:

mean_corr_df.T[:18][0]



Single-A-1    0.164887
Single-A-2    0.218027
Single-A-3   -0.263911
Single-A-4    0.189395
Single-A-5    0.080511
Single-A-6    0.172065
Single-A-7    0.301262
Single-A-8    0.083213
Single-A-9    0.301315
Single-B-1    0.289332
Single-B-2    0.244079
Single-B-3   -0.108293
Single-B-4    0.209946
Single-B-5    0.264010
Single-B-6    0.123056
Single-B-7    0.168901
Single-B-8    0.233497
Single-B-9    0.229077
Name: 0, dtype: float64

In [79]:
correlations_all['Single']['A']['3']

[-0.8521330020990453,
 0.19342457300641908,
 -0.7117790729636564,
 0.3148429325552099]