# Import libraries

In [1]:
# General
import pandas as pd
import numpy as np
import re
import csv
import itertools
import copy

# import seaborn as sns
# from scipy import stats

# import pickle as pickle
# from pylab import *
# import matplotlib.pyplot as plt
# from matplotlib.lines import Line2D
# from IPython.display import Image
# from mpl_toolkits.mplot3d import Axes3D 

# for KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics.pairwise import cosine_similarity, cosine_distances, euclidean_distances
from sklearn.metrics import pairwise_distances

# for KeplerMapper
import kmapper as km
from kmapper.plotlyviz import *

from sklearn.preprocessing import *
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
from sklearn import manifold
from sklearn.neighbors.kde import KernelDensity
from scipy.ndimage.filters import gaussian_filter1d
from sklearn.decomposition import TruncatedSVD


import warnings
warnings.filterwarnings("ignore")

import plotly.graph_objs as go
# from ipywidgets import (HBox, VBox)

# our functions
from functions import *

# import dionysus as d

# get_ipython().run_line_magic('matplotlib', 'inline')

print("libraries imported ...")


libraries imported ...
libraries imported ...


# Data Import and Preprocessing


In [2]:
# import data
data = pd.data = pd.read_csv('../data/coinfections.csv')
data

Unnamed: 0,infection scenario,time point [hours],viral load lung [NP copies/100µg RNA],bacerial burden lung [CFU/mL],IFN-γ [pg/mL],TNF-α []pg/mL],MCP-1 [pg/mL],IL-6 [pg/mL],IFN-β [pg/mL],IL-22 [pg/mL],GM-CSF [pg/mL]
0,IAV,1.5,137060.100000,0,26094.528,99.456,427.392,2732.400,2464.848000,19.824000,251.088
1,IAV,1.5,73532.840000,0,20254.848,173.568,297.264,2874.336,5791.536000,25.344000,329.376
2,IAV,1.5,366501.800000,0,5433.400,190.400,165.750,5416.000,0.000000,373.450000,0.000
3,IAV,1.5,39985.850000,0,12923.100,163.840,408.900,3287.990,1794.920000,23.500000,0.000
4,IAV,1.5,181103.200000,0,4403.020,22.720,118.240,517.260,0.000000,0.000000,0.000
5,IAV,1.5,123001.300000,0,138.870,106.410,34.880,53.550,1227.570000,0.000000,50.730
6,IAV,1.5,180120.500000,0,122.540,103.630,28.320,24.960,1186.470000,0.000000,66.040
7,IAV,6.0,213540.000000,0,10348.200,49.810,191.200,3622.990,1329.920000,11.130000,86.170
8,IAV,6.0,189586.300000,0,7953.560,153.670,297.280,2213.060,1353.250000,10.780000,123.170
9,IAV,6.0,100488.200000,0,15170.020,142.070,221.890,2816.030,872.280000,9.210000,0.000


In [3]:
# delete the units from the columns names
cols = data.columns
columns_names = []
for i in np.arange(len(cols)):
    x = re.split('\n',cols[i])
    columns_names.append(x[0])

data.columns = columns_names  

# rename columns that have greek letters
data.rename(columns={'IFN-γ':'IFN-gamma', 'TNF-α':'TFN-alpha', 'IFN-β':'IFN-beta', 'infection scenario':'infection group', 'GM-CSF ':'GM-CSF', 'bacerial burden lung':'bacterial burden lung'}, inplace=True)


In [4]:
# define the features variable for future use
features = list(data.columns[1:])
features

['time point',
 'viral load lung',
 'bacterial burden lung',
 'IFN-gamma',
 'TFN-alpha',
 'MCP-1',
 'IL-6',
 'IFN-beta',
 'IL-22',
 'GM-CSF']

In [5]:
features_no_v = ['time point',
 'bacterial burden lung',
 'IFN-gamma',
 'TFN-alpha',
 'MCP-1',
 'IL-6',
 'IFN-beta',
 'IL-22',
 'GM-CSF']
features_no_b = ['time point',
 'viral load lung',
 'IFN-gamma',
 'TFN-alpha',
 'MCP-1',
 'IL-6',
 'IFN-beta',
 'IL-22',
 'GM-CSF']

In [6]:
# separate infection groups into different data frames
IAV = data[data['infection group']=='IAV']
IAV.drop(['infection group'], axis=1, inplace=True)
IAV.reset_index(drop=True, inplace=True)

T4 = data[data['infection group']=='T4']
T4.drop(['infection group'], axis=1, inplace=True)
T4.reset_index(drop=True, inplace=True)

IAVT4 = data[data['infection group']=='IAV + T4']
IAVT4.drop(['infection group'], axis=1, inplace=True)
IAVT4.reset_index(drop=True, inplace=True)


In [7]:
# labels as integers
labels_data = data['infection group'].apply(lambda x: 1 if x=='IAV' else (2 if x=='T4' else 3)).as_matrix()

labels_IAV = IAV['time point'].apply(lambda x: 1 if x==1.5 else (2 if x==6.0 else (3 if x==18.0 else(4 if x==26.0 else 5)))).as_matrix()

labels_T4 = T4['time point'].apply(lambda x: 1 if x==1.5 else (2 if x==6.0 else (3 if x==18.0 else(4 if x==26.0 else 5)))).as_matrix()

labels_IAVT4 = IAVT4['time point'].apply(lambda x: 1 if x==1.5 else (2 if x==6.0 else (3 if x==18.0 else(4 if x==26.0 else 5)))).as_matrix()


In [8]:
print("data imported and processed ...")

data imported and processed ...


# Define lenses
## Euclidean metric
### All infection groups

In [9]:
lenses_features = def_lenses_features(data, features[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(data, features[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(data, features[1:], labels_data, metric = 'euclidean')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'euclidean'
lenses_geometry = def_lenses_geometry(data, 
                    features[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)


In [10]:
lenses_data_euclidean = copy.deepcopy(lenses_features)
lenses_data_euclidean.update(lenses_dimred)
lenses_data_euclidean.update(lenses_nbrs)
lenses_data_euclidean.update(lenses_geometry)
# lenses_data_euclidean['Neighbour_1']

### IAV

In [11]:
lenses_features = def_lenses_features(IAV, features_no_b[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(IAV, features_no_b[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(IAV, features_no_b[1:], labels_IAV, metric = 'euclidean')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'euclidean'
lenses_geometry = def_lenses_geometry(IAV, 
                    features_no_b[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)


In [12]:
lenses_IAV_euclidean = copy.deepcopy(lenses_features)
lenses_IAV_euclidean.update(lenses_dimred)
lenses_IAV_euclidean.update(lenses_nbrs)
lenses_IAV_euclidean.update(lenses_geometry)
# lenses_IAV_euclidean.keys()

### T4

In [13]:
lenses_features = def_lenses_features(T4, features_no_v[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(T4, features_no_v[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(T4, features_no_v[1:], labels_T4, metric = 'euclidean')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'euclidean'
lenses_geometry = def_lenses_geometry(T4, 
                    features_no_v[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)


In [14]:
lenses_T4_euclidean = copy.deepcopy(lenses_features)
lenses_T4_euclidean.update(lenses_dimred)
lenses_T4_euclidean.update(lenses_nbrs)
lenses_T4_euclidean.update(lenses_geometry)
# lenses_T4_euclidean.keys()

### IAV + T4

In [15]:
lenses_features = def_lenses_features(IAVT4, features[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(IAVT4, features[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(IAVT4, features[1:], labels_IAVT4, metric = 'euclidean')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'euclidean'
lenses_geometry = def_lenses_geometry(IAVT4, 
                    features[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)


In [16]:
lenses_IAVT4_euclidean = copy.deepcopy(lenses_features)
lenses_IAVT4_euclidean.update(lenses_dimred)
lenses_IAVT4_euclidean.update(lenses_nbrs)
lenses_IAVT4_euclidean.update(lenses_geometry)
# lenses_T4_euclidean.keys()

## Cosine
### All infection groups

In [17]:
lenses_features = def_lenses_features(data, features[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(data, features[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(data, features[1:], labels_data, metric = 'cosine')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'cosine'
lenses_geometry = def_lenses_geometry(data, 
                    features[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)


In [18]:
lenses_data_cosine = copy.deepcopy(lenses_features)
lenses_data_cosine.update(lenses_dimred)
lenses_data_cosine.update(lenses_nbrs)
lenses_data_cosine.update(lenses_geometry)

### IAV

In [19]:
lenses_features = def_lenses_features(IAV, features_no_b[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(IAV, features_no_b[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(IAV, features_no_b[1:], labels_IAV, metric = 'cosine')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'cosine'
lenses_geometry = def_lenses_geometry(IAV, 
                    features_no_b[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)

In [20]:
lenses_IAV_cosine = copy.deepcopy(lenses_features)
lenses_IAV_cosine.update(lenses_dimred)
lenses_IAV_cosine.update(lenses_nbrs)
lenses_IAV_cosine.update(lenses_geometry)

### T4

In [21]:
lenses_features = def_lenses_features(T4, features_no_v[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(T4, features_no_v[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(T4, features_no_v[1:], labels_T4, metric = 'cosine')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'cosine'
lenses_geometry = def_lenses_geometry(T4, 
                    features_no_v[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)

In [22]:
lenses_T4_cosine = copy.deepcopy(lenses_features)
lenses_T4_cosine.update(lenses_dimred)
lenses_T4_cosine.update(lenses_nbrs)
lenses_T4_cosine.update(lenses_geometry)

### IAVT4

In [23]:
lenses_features = def_lenses_features(IAVT4, features[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(IAVT4, features[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(IAVT4, features[1:], labels_IAVT4, metric = 'cosine')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'cosine'
lenses_geometry = def_lenses_geometry(IAVT4, 
                    features[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)

In [24]:
lenses_IAVT4_cosine = copy.deepcopy(lenses_features)
lenses_IAVT4_cosine.update(lenses_dimred)
lenses_IAVT4_cosine.update(lenses_nbrs)
lenses_IAVT4_cosine.update(lenses_geometry)

## Correlation
### All infection groups

In [25]:
lenses_features = def_lenses_features(data, features[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(data, features[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(data, features[1:], labels_data, metric = 'correlation')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'correlation'
lenses_geometry = def_lenses_geometry(data, 
                    features[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)


In [26]:
lenses_data_correlation = copy.deepcopy(lenses_features)
lenses_data_correlation.update(lenses_dimred)
lenses_data_correlation.update(lenses_nbrs)
lenses_data_correlation.update(lenses_geometry)

### IAV

In [27]:
lenses_features = def_lenses_features(IAV, features_no_b[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(IAV, features_no_b[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(IAV, features_no_b[1:], labels_IAV, metric = 'correlation')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'correlation'
lenses_geometry = def_lenses_geometry(IAV, 
                    features_no_b[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)

In [28]:
lenses_IAV_correlation = copy.deepcopy(lenses_features)
lenses_IAV_correlation.update(lenses_dimred)
lenses_IAV_correlation.update(lenses_nbrs)
lenses_IAV_correlation.update(lenses_geometry)

### T4

In [29]:
lenses_features = def_lenses_features(T4, features_no_v[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(T4, features_no_v[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(T4, features_no_v[1:], labels_T4, metric = 'correlation')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'correlation'
lenses_geometry = def_lenses_geometry(T4, 
                    features_no_v[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)

In [30]:
lenses_T4_correlation = copy.deepcopy(lenses_features)
lenses_T4_correlation.update(lenses_dimred)
lenses_T4_correlation.update(lenses_nbrs)
lenses_T4_correlation.update(lenses_geometry)

### IAVT4

In [31]:
lenses_features = def_lenses_features(IAVT4, features[1:])

get_PCA = True
get_isomap = True
get_LLE = True
get_MDS = True
get_spectral_embedding = True
get_SVD = True
lenses_dimred = def_lenses_dimred(IAVT4, features[1:], get_PCA, get_isomap, get_LLE, get_MDS, 
                                  get_spectral_embedding, get_SVD)

lenses_nbrs = def_lenses_neighbours(IAVT4, features[1:], labels_IAVT4, metric = 'correlation')

get_density = True
get_eccentricity = True
eccentricity_exponent = 0.3
get_inf_centrality = True
others = False
metric = 'correlation'
lenses_geometry = def_lenses_geometry(IAVT4, 
                    features[1:], 
                    get_density, 
                    get_eccentricity, 
                    eccentricity_exponent,
                    get_inf_centrality,
                    others,
                    metric)

In [32]:
lenses_IAVT4_correlation = copy.deepcopy(lenses_features)
lenses_IAVT4_correlation.update(lenses_dimred)
lenses_IAVT4_correlation.update(lenses_nbrs)
lenses_IAVT4_correlation.update(lenses_geometry)

In [33]:
print("lenses defined ...\n")

lenses defined ...



# Define simulations
Here we define the simuations according to the pairs of lenses we want to test. We test the following pairs:
#### Simulation 1a. 
Lens1 = distance to first neighbour

Lens2 = distance to the second neighbour

#### Simulation 1b.
Lens1 = distance to first neighbour

Lens2 = feature

#### Simulation 1c.
Lens1 = sum of the distances to the first two neighbours

Lens2 = feature

#### Simulation 2a.
Lens1 = first dimension of a dimensionality reduction algorithm

Lens2 = second dimension of a dimensionality reduction algorithm

#### Simulation 2b.
Lens1 = first dimension of a dimensionality reduction algorithm

Lens2 = feature

#### Simulation 3a.
Lens1 = geometric/statistical projection

Lens2 = feature

Each simulation is defined for each of the three metrics we wanted to test: Euclidean, cosine, correlation.

The dimensionality reduction algorithms are the following: PCA, Isometric mapping, Locally linear Embedding, Multidimensional Scaling, Spectral Embedding, Singular Value Decomposition. Please see the functions.py file for further details on the computational implementation and the supplementary material script for a  description of each method.

The geometric/statistical projections are: density, eccentricity, eccentricity exponent, infinite centrality, sum, mean, median, max, min, std. Please see the functions.py file for further details on the computational implementation

In [41]:
dimred_principal = [list(lenses_dimred.keys())[x] for x in np.arange(0,len(lenses_dimred.keys()),2)]
dimred_secondary = [list(lenses_dimred.keys())[x] for x in np.arange(1,len(lenses_dimred.keys()),2)]
interval = np.arange(1,20,2)
percentage_overlap =  np.arange(0.2,0.9,0.1)

eps_cosine = np.arange(0.1,1,0.2)
eps_euclidean = np.linspace(1,520002,5)
eps_correlation = np.arange(0.1,2,0.2)


## All infection groups and IAV + T4
For these two data sets we do not need to exclude any feature (see below, for IAV we exclude bacteria and for T4 we exclude virus).

In [42]:

# euclidean
eps = eps_euclidean
temp = list(itertools.product(interval, interval, percentage_overlap, percentage_overlap, eps))

sim_1a_euclidean = list(itertools.product(['Neighbour_1'], ['Neighbour_2'], interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1b_euclidean = list(itertools.product(['Neighbour_1'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1c_euclidean = list(itertools.product(['Sum'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

sim_2 = list(itertools.product( list(zip(dimred_principal, dimred_secondary)),  temp))
sim_2a_euclidean = []
for i in sim_2:
    temp1 = []
    for j in i:
        for k in j:
            temp1.append(k)
    sim_2a_euclidean.append(temp1)
    
sim_2b_euclidean = list(itertools.product(dimred_principal, lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_3a_euclidean = list(itertools.product(lenses_geometry.keys(),lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

In [43]:
# cosine
eps = eps_cosine
temp = list(itertools.product(interval, interval, percentage_overlap, percentage_overlap, eps))

sim_1a_cosine = list(itertools.product(['Neighbour_1'], ['Neighbour_2'], interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1b_cosine = list(itertools.product(['Neighbour_1'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1c_cosine = list(itertools.product(['Sum'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

sim_2 = list(itertools.product( list(zip(dimred_principal, dimred_secondary)),  temp))
sim_2a_cosine = []
for i in sim_2:
    temp1 = []
    for j in i:
        for k in j:
            temp1.append(k)
    sim_2a_cosine.append(temp1)
    
sim_2b_cosine = list(itertools.product(dimred_principal, lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_3a_cosine = list(itertools.product(lenses_geometry.keys(),lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))



In [44]:
# correlation
eps = eps_correlation
temp = list(itertools.product(interval, interval, percentage_overlap, percentage_overlap, eps))

sim_1a_correlation = list(itertools.product(['Neighbour_1'], ['Neighbour_2'], interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1b_correlation = list(itertools.product(['Neighbour_1'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1c_correlation = list(itertools.product(['Sum'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

sim_2 = list(itertools.product( list(zip(dimred_principal, dimred_secondary)),  temp))
sim_2a_correlation = []
for i in sim_2:
    temp1 = []
    for j in i:
        for k in j:
            temp1.append(k)
    sim_2a_correlation.append(temp1)
    
sim_2b_correlation = list(itertools.product(dimred_principal, lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_3a_correlation = list(itertools.product(lenses_geometry.keys(),lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

### IAV
For IAV we need to exclude bacteria

In [None]:
lenses_features = def_lenses_features(IAV, features_no_b[1:])

In [None]:

# euclidean
eps = eps_euclidean
temp = list(itertools.product(interval, interval, percentage_overlap, percentage_overlap, eps))

sim_1a_euclidean_no_b = list(itertools.product(['Neighbour_1'], ['Neighbour_2'], interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1b_euclidean_no_b = list(itertools.product(['Neighbour_1'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1c_euclidean_no_b = list(itertools.product(['Sum'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

sim_2 = list(itertools.product( list(zip(dimred_principal, dimred_secondary)),  temp))
sim_2a_euclidean_no_b = []
for i in sim_2:
    temp1 = []
    for j in i:
        for k in j:
            temp1.append(k)
    sim_2a_euclidean.append(temp1)
    
sim_2b_euclidean_no_b = list(itertools.product(dimred_principal, lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_3a_euclidean_no_b = list(itertools.product(lenses_geometry.keys(),lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

In [None]:
# cosine
eps = eps_cosine
temp = list(itertools.product(interval, interval, percentage_overlap, percentage_overlap, eps))

sim_1a_cosine_no_b = list(itertools.product(['Neighbour_1'], ['Neighbour_2'], interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1b_cosine_no_b = list(itertools.product(['Neighbour_1'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1c_cosine_no_b = list(itertools.product(['Sum'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

sim_2 = list(itertools.product( list(zip(dimred_principal, dimred_secondary)),  temp))
sim_2a_cosine_no_b = []
for i in sim_2:
    temp1 = []
    for j in i:
        for k in j:
            temp1.append(k)
    sim_2a_cosine.append(temp1)
    
sim_2b_cosine_no_b = list(itertools.product(dimred_principal, lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_3a_cosine_no_b = list(itertools.product(lenses_geometry.keys(),lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))





In [None]:
# correlation
eps = eps_correlation
temp = list(itertools.product(interval, interval, percentage_overlap, percentage_overlap, eps))

sim_1a_correlation_no_b = list(itertools.product(['Neighbour_1'], ['Neighbour_2'], interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1b_correlation_no_b = list(itertools.product(['Neighbour_1'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1c_correlation_no_b = list(itertools.product(['Sum'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

sim_2 = list(itertools.product( list(zip(dimred_principal, dimred_secondary)),  temp))
sim_2a_correlation_no_b = []
for i in sim_2:
    temp1 = []
    for j in i:
        for k in j:
            temp1.append(k)
    sim_2a_correlation.append(temp1)
    
sim_2b_correlation_no_b = list(itertools.product(dimred_principal, lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_3a_correlation_no_b = list(itertools.product(lenses_geometry.keys(),lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

### T4
For T4 we need to exclude virus

In [None]:
lenses_features = def_lenses_features(T4, features_no_v[1:])

In [None]:

# euclidean
eps = eps_euclidean
temp = list(itertools.product(interval, interval, percentage_overlap, percentage_overlap, eps))

sim_1a_euclidean_no_v = list(itertools.product(['Neighbour_1'], ['Neighbour_2'], interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1b_euclidean_no_v = list(itertools.product(['Neighbour_1'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1c_euclidean_no_v = list(itertools.product(['Sum'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

sim_2 = list(itertools.product( list(zip(dimred_principal, dimred_secondary)),  temp))
sim_2a_euclidean_no_v = []
for i in sim_2:
    temp1 = []
    for j in i:
        for k in j:
            temp1.append(k)
    sim_2a_euclidean.append(temp1)
    
sim_2b_euclidean_no_v = list(itertools.product(dimred_principal, lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_3a_euclidean_no_v = list(itertools.product(lenses_geometry.keys(),lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

In [None]:
# cosine
eps = eps_cosine
temp = list(itertools.product(interval, interval, percentage_overlap, percentage_overlap, eps))

sim_1a_cosine_no_v = list(itertools.product(['Neighbour_1'], ['Neighbour_2'], interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1b_cosine_no_v = list(itertools.product(['Neighbour_1'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1c_cosine_no_v = list(itertools.product(['Sum'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

sim_2 = list(itertools.product( list(zip(dimred_principal, dimred_secondary)),  temp))
sim_2a_cosine_no_v = []
for i in sim_2:
    temp1 = []
    for j in i:
        for k in j:
            temp1.append(k)
    sim_2a_cosine.append(temp1)
    
sim_2b_cosine_no_v = list(itertools.product(dimred_principal, lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_3a_cosine_no_v = list(itertools.product(lenses_geometry.keys(),lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))





In [None]:
# correlation
eps = eps_correlation
temp = list(itertools.product(interval, interval, percentage_overlap, percentage_overlap, eps))

sim_1a_correlation_no_v = list(itertools.product(['Neighbour_1'], ['Neighbour_2'], interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1b_correlation_no_v = list(itertools.product(['Neighbour_1'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_1c_correlation_no_v = list(itertools.product(['Sum'],lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

sim_2 = list(itertools.product( list(zip(dimred_principal, dimred_secondary)),  temp))
sim_2a_correlation_no_v = []
for i in sim_2:
    temp1 = []
    for j in i:
        for k in j:
            temp1.append(k)
    sim_2a_correlation.append(temp1)
    
sim_2b_correlation_no_v = list(itertools.product(dimred_principal, lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))
sim_3a_correlation_no_v = list(itertools.product(lenses_geometry.keys(),lenses_features.keys(), interval, interval, 
                                percentage_overlap, percentage_overlap, eps))

In [45]:
print("simulations defined ...\n")

simulations defined ...



# Run simulations

As a demonstration here we run only simulation 1a. Any of the other simulations can be also ran by changing the appropiate input parameter for the function 'mapper_parameter_gridsearch' below.

### All infection groups

In [None]:
data_euclidean_sc_df = mapper_parameter_gridsearch(data, features[1:], labels_data, 'euclidean', lenses_data_euclidean, 
                                                   sim_1a_euclidean, 6, 'data_c_euclidean_sc_df_1a.csv')

In [None]:
print("data_euclidean_sc_df done ... \n")

In [None]:
data_cosine_sc_df = mapper_parameter_gridsearch(data, features[1:], labels_data, 'cosine', lenses_data_cosine, 
                                                   sim_1a_cosine, 6, 'data_c_cosine_sc_df_1a.csv')

In [None]:
print("data_cosine_sc_df done ... \n")

In [None]:
data_correlation_sc_df = mapper_parameter_gridsearch(data, features[1:], labels_data, 'correlation', lenses_data_correlation, 
                                                   sim_1a_correlation, 6, 'data_c_correlation_sc_df_1a.csv')

In [None]:
print("data_correlation_sc_df done ... \n")

### IAV

In [None]:
IAV_euclidean_sc_df = mapper_parameter_gridsearch(IAV, features_no_b[1:], labels_IAV, 'euclidean', lenses_IAV_euclidean, 
                                                   sim_1a_euclidean_no_b, 1, 'IAV_euclidean_sc_df_1a.csv')

In [None]:
print("IAV_euclidean_sc_df done ... \n")

In [None]:
IAV_cosine_sc_df = mapper_parameter_gridsearch(IAV, features_no_b[1:], labels_IAV, 'cosine', lenses_IAV_cosine, 
                                                   sim_1a_cosine_no_b, 1, 'IAV_cosine_sc_df_1a.csv')

In [None]:
print("IAV_cosine_sc_df done ... \n")

In [None]:
IAV_correlation_sc_df = mapper_parameter_gridsearch(IAV, features_no_b[1:], labels_IAV, 'correlation', lenses_IAV_correlation, 
                                                   sim_1a_correlation_no_b, 1, 'IAV_correlation_sc_df_1a.csv')

In [None]:
print("IAV_correlation_sc_df done ... \n")

### T4

In [None]:
T4_euclidean_sc_df = mapper_parameter_gridsearch(T4, features_no_v[1:], labels_T4, 'euclidean', lenses_T4_euclidean, 
                                                   sim_1a_euclidean_no_v, 1, 'T4_euclidean_sc_df_1a.csv')

In [None]:
print("T4_euclidean_sc_df done ... \n")

In [None]:
T4_cosine_sc_df = mapper_parameter_gridsearch(T4, features_no_v[1:], labels_T4, 'cosine', lenses_T4_cosine, 
                                                   sim_1a_cosine_no_v, 1, 'T4_cosine_sc_df_1a.csv')

In [None]:
print("T4_cosine_sc_df done ... \n")

In [None]:
T4_correlation_sc_df = mapper_parameter_gridsearch(T4, features_no_v[1:], labels_T4, 'correlation', lenses_T4_correlation, 
                                                   sim_1a_correlation_no_v, 1, 'T4_correlation_sc_df_1a.csv')

In [None]:
print("T4_correlation_sc_df done ... \n")

### IAVT4

In [None]:
IAVT4_euclidean_sc_df = mapper_parameter_gridsearch(IAVT4, features[1:], labels_IAVT4, 'euclidean', lenses_IAVT4_euclidean, 
                                                   sim_1a_euclidean, 1, 'IAVT4_euclidean_sc_df_1a.csv')

In [None]:
print("IAVT4_euclidean_sc_df done ... \n")

In [None]:
IAVT4_cosine_sc_df = mapper_parameter_gridsearch(IAVT4, features[1:], labels_IAVT4, 'cosine', lenses_IAVT4_cosine, 
                                                   sim_1a_cosine, 1, 'IAVT4_cosine_sc_df_1a.csv')

In [None]:
print("IAVT4_cosine_sc_df done ... \n")

In [None]:
IAVT4_correlation_sc_df = mapper_parameter_gridsearch(IAVT4, features[1:], labels_IAVT4, 'correlation', lenses_IAVT4_correlation, 
                                                   sim_1a_correlation, 1, 'IAVT4_correlation_sc_df_1a.csv')

In [None]:
print("IAVT4_correlation_sc_df done ... \n")

# Visualise mapper output

Here we choose the simplicial complexes from simulation 1a that we want to visualise. See the supplementary material for a full description.

## All infection groups

### Euclidean

In [34]:
data_euclidean_sc_df_1a = pd.read_csv('../dataframes/data_c_euclidean_sc_df_1a.csv')
data_euclidean_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
data_euclidean_sc_df_1a.describe()

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,0,0,0,0,0,0,0,0,0
unique,0,0,0,0,0,0,0,0,0


In [35]:
# to_plot = data_euclidean_sc_df_1a[data_euclidean_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
# to_plot = to_plot[to_plot['n_vertices']>=15]
# to_plot.reset_index(drop=True, inplace=True)
# to_plot

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components


In [None]:
# # make data into np array
# X = data[features[1:]].as_matrix()

# # init mapper
# mapper = km.KeplerMapper() 

# for i in np.arange(0,10,1):
    
#     lens1 = lenses_data_euclidean[to_plot.loc[i,'lens1']]
#     lens2 = lenses_data_euclidean[to_plot.loc[i,'lens2']]
#     lens = np.c_[lens1, lens2]
#     c1 = to_plot.loc[i,'lens1_n_cubes']
#     c2 = to_plot.loc[i,'lens2_n_cubes']
#     p1 = to_plot.loc[i,'lens1_perc_overlap']
#     p2 = to_plot.loc[i,'lens2_perc_overlap']
#     eps = to_plot.loc[i,'eps']

#     # create complex
#     scomplex = mapper.map(lens, 
#                           X, 
#                           cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
#                           clusterer=DBSCAN(metric='euclidean', eps=eps, min_samples=1),
#                                               precomputed=False)

#     meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
#                      "n_cubes":"[{},{}]".format(c1,c2),
#                      "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
#                      "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
#                      "SCALER":"MinMaxScaler(reature_range=(0,1))"}

#     title = "data_euclidean_sc_df_1a_{}.html".format(i)
#     path = "../mapper_output/data/data_euclidean_sim_1a/" + title
#     _ = mapper.visualize(graph=scomplex, 
#                      path_html=path, 
#                      custom_meta=meta_dict,                    
#                      custom_tooltips=labels_data,
#                      color_function=labels_data,
#                      nbins=2, 
#                      title=title)

# # for col in features[1:]:
# #     title = "T4_{}_coloring.html".format(col)
# #     path = "mapper_output/" + title
# #     mapperT4.visualize(graph=scomplex, 
# #                      path_html=path, 
# #                      custom_meta=meta_dict,                    
# #                      custom_tooltips=T4[col].as_matrix(),
# #                      color_function=T4[col].as_matrix(),
# #                      nbins=5, 
# #                      title=title)

### Cosine

In [35]:
data_cosine_sc_df_1a = pd.read_csv('../dataframes/data_c_cosine_sc_df_1a.csv')
data_cosine_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
data_cosine_sc_df_1a.describe()

Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,17913.0,17913.0,17913.0,17913.0,17913.0,17913.0,17913.0
mean,9.442919,9.047898,0.522045,0.531513,0.592067,25.317423,3.933456
std,5.689967,5.580919,0.195982,0.195626,0.253498,20.526178,1.209301
min,1.0,1.0,0.2,0.2,0.1,1.0,1.0
25%,5.0,5.0,0.4,0.4,0.5,12.0,3.0
50%,9.0,9.0,0.5,0.5,0.7,20.0,4.0
75%,15.0,13.0,0.7,0.7,0.9,32.0,5.0
max,19.0,19.0,0.8,0.8,0.9,221.0,6.0


In [36]:
to_plot = data_cosine_sc_df_1a[data_cosine_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>=15]
to_plot.reset_index(drop=True, inplace=True)
to_plot


Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,3,5,0.7,0.8,0.5,20,1
1,Neighbour_1,Neighbour_2,5,3,0.8,0.7,0.5,18,1
2,Neighbour_1,Neighbour_2,3,5,0.8,0.8,0.5,21,1
3,Neighbour_1,Neighbour_2,5,3,0.8,0.8,0.5,21,1
4,Neighbour_1,Neighbour_2,5,5,0.8,0.8,0.5,32,1
5,Neighbour_1,Neighbour_2,3,5,0.7,0.8,0.7,18,1
6,Neighbour_1,Neighbour_2,3,7,0.7,0.8,0.7,23,1
7,Neighbour_1,Neighbour_2,5,3,0.8,0.7,0.7,17,1
8,Neighbour_1,Neighbour_2,3,5,0.8,0.8,0.7,18,1
9,Neighbour_1,Neighbour_2,5,3,0.8,0.8,0.7,21,1


In [37]:
# make data into np array
X = data[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(0,10,1):
    
    lens1 = lenses_data_cosine[to_plot.loc[i,'lens1']]
    lens2 = lenses_data_cosine[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='cosine', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "data_cosine_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/data/data_cosine_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_data,
                     color_function=labels_data,
                     nbins=3, 
                     title=title)



### Correlation

In [38]:
data_correlation_sc_df_1a = pd.read_csv('../dataframes/data_c_correlation_sc_df_1a.csv')
data_correlation_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
data_correlation_sc_df_1a.describe()

Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,41539.0,41539.0,41539.0,41539.0,41539.0,41539.0,41539.0
mean,9.732228,9.527938,0.509134,0.515987,1.12898,20.413226,3.500084
std,5.716901,5.674213,0.199227,0.19846,0.521503,16.304444,1.09394
min,1.0,1.0,0.2,0.2,0.1,1.0,1.0
25%,5.0,5.0,0.3,0.3,0.7,10.0,3.0
50%,9.0,9.0,0.5,0.5,1.1,16.0,3.0
75%,15.0,15.0,0.7,0.7,1.5,26.0,4.0
max,19.0,19.0,0.8,0.8,1.9,211.0,6.0


In [39]:
to_plot = data_correlation_sc_df_1a[data_correlation_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>=10]
to_plot.reset_index(drop=True, inplace=True)
to_plot



Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,3,3,0.7,0.7,0.5,12,1
1,Neighbour_1,Neighbour_2,3,3,0.7,0.8,0.5,12,1
2,Neighbour_1,Neighbour_2,3,5,0.7,0.8,0.5,22,1
3,Neighbour_1,Neighbour_2,3,3,0.8,0.7,0.5,12,1
4,Neighbour_1,Neighbour_2,5,3,0.8,0.7,0.5,19,1
5,Neighbour_1,Neighbour_2,5,3,0.8,0.8,0.5,21,1
6,Neighbour_1,Neighbour_2,3,5,0.8,0.8,0.5,24,1
7,Neighbour_1,Neighbour_2,5,5,0.8,0.8,0.5,34,1
8,Neighbour_1,Neighbour_2,3,3,0.7,0.7,0.7,12,1
9,Neighbour_1,Neighbour_2,3,3,0.7,0.8,0.7,12,1


In [40]:
# make data into np array
X = data[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(0,10,1):
    
    lens1 = lenses_data_correlation[to_plot.loc[i,'lens1']]
    lens2 = lenses_data_correlation[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='cosine', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "data_correlation_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/data/data_correlation_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_data,
                     color_function=labels_data,
                     nbins=3, 
                     title=title)



## IAV
### Euclidean

In [41]:
IAV_euclidean_sc_df_1a = pd.read_csv('../dataframes/IAV_euclidean_sc_df_1a.csv')
IAV_euclidean_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
IAV_euclidean_sc_df_1a.describe()

Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,1129.0,1129.0,1129.0,1129.0,1129.0,1129.0,1129.0
mean,4.153233,3.0,0.605492,0.614438,376990.210363,10.76705,1.0
std,3.583662,2.43497,0.187665,0.189939,133507.769916,12.267618,0.0
min,1.0,1.0,0.2,0.2,130001.25,1.0,1.0
25%,1.0,1.0,0.5,0.5,260001.5,3.0,1.0
50%,3.0,3.0,0.7,0.7,390001.75,7.0,1.0
75%,5.0,5.0,0.8,0.8,520002.0,14.0,1.0
max,15.0,9.0,0.8,0.8,520002.0,77.0,1.0


In [42]:
# len(IAV)

In [43]:
to_plot = IAV_euclidean_sc_df_1a[IAV_euclidean_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>10]
to_plot.reset_index(drop=True, inplace=True)
to_plot


Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,1,9,0.2,0.8,130001.25,11,1
1,Neighbour_1,Neighbour_2,1,9,0.3,0.8,130001.25,11,1
2,Neighbour_1,Neighbour_2,1,9,0.4,0.8,130001.25,11,1
3,Neighbour_1,Neighbour_2,1,9,0.5,0.8,130001.25,11,1
4,Neighbour_1,Neighbour_2,3,3,0.6,0.7,130001.25,12,1
5,Neighbour_1,Neighbour_2,3,5,0.6,0.7,130001.25,17,1
6,Neighbour_1,Neighbour_2,1,9,0.6,0.8,130001.25,11,1
7,Neighbour_1,Neighbour_2,3,3,0.6,0.8,130001.25,12,1
8,Neighbour_1,Neighbour_2,3,5,0.6,0.8,130001.25,18,1
9,Neighbour_1,Neighbour_2,3,7,0.6,0.8,130001.25,22,1


In [44]:
# make IAV into np array
X = IAV[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(0,10,1):
    
    lens1 = lenses_IAV_euclidean[to_plot.loc[i,'lens1']]
    lens2 = lenses_IAV_euclidean[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='euclidean', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "IAV_euclidean_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/IAV/IAV_euclidean_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_IAV,
                     color_function=labels_IAV,
                     nbins=len(np.unique(labels_IAV)), 
                     title=title)



### Cosine

In [45]:
IAV_cosine_sc_df_1a = pd.read_csv('../dataframes/IAV_cosine_sc_df_1a.csv')
IAV_cosine_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
IAV_cosine_sc_df_1a.describe()

Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,728.0,728.0,728.0,728.0,728.0,728.0,728.0
mean,3.0,2.538462,0.621429,0.607692,0.6,7.546703,1.0
std,2.508856,1.947355,0.193518,0.194126,0.223761,9.148659,0.0
min,1.0,1.0,0.2,0.2,0.3,1.0,1.0
25%,1.0,1.0,0.5,0.5,0.45,1.0,1.0
50%,2.0,1.0,0.7,0.7,0.6,4.0,1.0
75%,5.0,3.0,0.8,0.8,0.75,9.0,1.0
max,9.0,7.0,0.8,0.8,0.9,53.0,1.0


In [46]:
to_plot = IAV_cosine_sc_df_1a[IAV_cosine_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>10]
to_plot.reset_index(drop=True, inplace=True)
to_plot

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,3,5,0.6,0.7,0.3,19,1
1,Neighbour_1,Neighbour_2,3,5,0.6,0.8,0.3,18,1
2,Neighbour_1,Neighbour_2,3,7,0.6,0.8,0.3,22,1
3,Neighbour_1,Neighbour_2,5,3,0.7,0.6,0.3,14,1
4,Neighbour_1,Neighbour_2,5,3,0.7,0.7,0.3,16,1
5,Neighbour_1,Neighbour_2,3,5,0.7,0.7,0.3,20,1
6,Neighbour_1,Neighbour_2,5,5,0.7,0.7,0.3,27,1
7,Neighbour_1,Neighbour_2,3,5,0.7,0.8,0.3,18,1
8,Neighbour_1,Neighbour_2,5,3,0.7,0.8,0.3,18,1
9,Neighbour_1,Neighbour_2,3,7,0.7,0.8,0.3,23,1


In [47]:
# make IAV into np array
X = IAV[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(0,10,1):
    
    lens1 = lenses_IAV_cosine[to_plot.loc[i,'lens1']]
    lens2 = lenses_IAV_cosine[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='cosine', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "IAV_cosine_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/IAV/IAV_cosine_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_IAV,
                     color_function=labels_IAV,
                     nbins=len(np.unique(labels_IAV)), 
                     title=title)



### Correlation

In [48]:
IAV_correlation_sc_df_1a = pd.read_csv('../dataframes/IAV_correlation_sc_df_1a.csv')
IAV_correlation_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
IAV_correlation_sc_df_1a.describe()


Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,1937.0,1937.0,1937.0,1937.0,1937.0,1937.0,1937.0
mean,2.755292,3.628807,0.614146,0.616366,1.210067,9.41714,1.0
std,2.239762,2.960319,0.193879,0.186656,0.456798,10.974166,0.0
min,1.0,1.0,0.2,0.2,0.5,1.0,1.0
25%,1.0,1.0,0.5,0.5,0.9,3.0,1.0
50%,1.0,3.0,0.7,0.7,1.3,5.0,1.0
75%,5.0,5.0,0.8,0.8,1.7,11.0,1.0
max,9.0,11.0,0.8,0.8,1.9,69.0,1.0


In [49]:
to_plot = IAV_correlation_sc_df_1a[IAV_correlation_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>10]
to_plot.reset_index(drop=True, inplace=True)
to_plot

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,1,9,0.2,0.8,0.5,12,1
1,Neighbour_1,Neighbour_2,1,11,0.2,0.8,0.5,14,1
2,Neighbour_1,Neighbour_2,1,9,0.3,0.8,0.5,12,1
3,Neighbour_1,Neighbour_2,1,11,0.3,0.8,0.5,14,1
4,Neighbour_1,Neighbour_2,1,9,0.4,0.8,0.5,12,1
5,Neighbour_1,Neighbour_2,1,11,0.4,0.8,0.5,14,1
6,Neighbour_1,Neighbour_2,1,9,0.5,0.8,0.5,12,1
7,Neighbour_1,Neighbour_2,1,11,0.5,0.8,0.5,14,1
8,Neighbour_1,Neighbour_2,3,3,0.6,0.4,0.5,12,1
9,Neighbour_1,Neighbour_2,3,3,0.6,0.5,0.5,12,1


In [50]:
# make IAV into np array
X = IAV[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(0,10,1):
    
    lens1 = lenses_IAV_correlation[to_plot.loc[i,'lens1']]
    lens2 = lenses_IAV_correlation[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='correlation', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "IAV_correlation_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/IAV/IAV_correlation_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_IAV,
                     color_function=labels_IAV,
                     nbins=len(np.unique(labels_IAV)), 
                     title=title)



## T4
### Euclidean

In [51]:
T4_euclidean_sc_df_1a = pd.read_csv('../dataframes/T4_euclidean_sc_df_1a.csv')
T4_euclidean_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
T4_euclidean_sc_df_1a.describe()

Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,1.8,1.8,0.58,0.58,520002.0,3.06,1.0
std,1.333333,1.333333,0.209858,0.209858,0.0,3.158506,0.0
min,1.0,1.0,0.2,0.2,520002.0,1.0,1.0
25%,1.0,1.0,0.4,0.4,520002.0,1.0,1.0
50%,1.0,1.0,0.65,0.65,520002.0,3.0,1.0
75%,3.0,3.0,0.8,0.8,520002.0,3.0,1.0
max,5.0,5.0,0.8,0.8,520002.0,17.0,1.0


In [52]:
to_plot = T4_euclidean_sc_df_1a[T4_euclidean_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>=10]
to_plot.reset_index(drop=True, inplace=True)
to_plot


Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,3,5,0.7,0.8,520002.0,11,1
1,Neighbour_1,Neighbour_2,5,3,0.8,0.7,520002.0,11,1
2,Neighbour_1,Neighbour_2,3,5,0.8,0.8,520002.0,15,1
3,Neighbour_1,Neighbour_2,5,3,0.8,0.8,520002.0,15,1
4,Neighbour_1,Neighbour_2,5,5,0.8,0.8,520002.0,17,1


In [53]:
# make T4 into np array
X = T4[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(len(to_plot)):
    
    lens1 = lenses_T4_euclidean[to_plot.loc[i,'lens1']]
    lens2 = lenses_T4_euclidean[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='euclidean', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "T4_euclidean_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/T4/T4_euclidean_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_T4,
                     color_function=labels_T4,
                     nbins=len(np.unique(labels_T4)), 
                     title=title)



### Cosine

In [54]:
T4_cosine_sc_df_1a = pd.read_csv('../dataframes/T4_cosine_sc_df_1a.csv')
T4_cosine_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
T4_cosine_sc_df_1a.describe()

Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,1.8,1.8,0.58,0.58,0.9,3.7,1.0
std,1.333333,1.333333,0.209858,0.209858,6.694897e-16,4.321663,0.0
min,1.0,1.0,0.2,0.2,0.9,1.0,1.0
25%,1.0,1.0,0.4,0.4,0.9,1.0,1.0
50%,1.0,1.0,0.65,0.65,0.9,3.0,1.0
75%,3.0,3.0,0.8,0.8,0.9,4.0,1.0
max,5.0,5.0,0.8,0.8,0.9,24.0,1.0


In [55]:
to_plot = T4_cosine_sc_df_1a[T4_cosine_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>10]
to_plot.reset_index(drop=True, inplace=True)
to_plot

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,3,3,0.7,0.7,0.9,12,1
1,Neighbour_1,Neighbour_2,3,3,0.7,0.8,0.9,12,1
2,Neighbour_1,Neighbour_2,3,5,0.7,0.8,0.9,17,1
3,Neighbour_1,Neighbour_2,3,3,0.8,0.7,0.9,12,1
4,Neighbour_1,Neighbour_2,5,3,0.8,0.7,0.9,17,1
5,Neighbour_1,Neighbour_2,3,5,0.8,0.8,0.9,18,1
6,Neighbour_1,Neighbour_2,5,3,0.8,0.8,0.9,18,1
7,Neighbour_1,Neighbour_2,5,5,0.8,0.8,0.9,24,1


In [56]:
# make T4 into np array
X = T4[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(len(to_plot)):
    
    lens1 = lenses_T4_cosine[to_plot.loc[i,'lens1']]
    lens2 = lenses_T4_cosine[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='cosine', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "T4_cosine_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/T4/T4_cosine_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_T4,
                     color_function=labels_T4,
                     nbins=len(np.unique(labels_T4)), 
                     title=title)



### Correlation

In [57]:
T4_correlation_sc_df_1a = pd.read_csv('../dataframes/T4_correlation_sc_df_1a.csv')
T4_correlation_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
T4_correlation_sc_df_1a.describe()

Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,676.0,676.0,676.0,676.0,676.0,676.0,676.0
mean,2.39645,2.39645,0.604142,0.604142,1.526036,5.399408,1.0
std,1.951164,1.951164,0.203337,0.203337,0.272442,6.242288,0.0
min,1.0,1.0,0.2,0.2,1.1,1.0,1.0
25%,1.0,1.0,0.4,0.4,1.3,1.0,1.0
50%,1.0,1.0,0.7,0.7,1.5,3.0,1.0
75%,3.0,3.0,0.8,0.8,1.7,6.0,1.0
max,7.0,7.0,0.8,0.8,1.9,33.0,1.0


In [58]:
to_plot = T4_correlation_sc_df_1a[T4_correlation_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>=10]
to_plot.reset_index(drop=True, inplace=True)
to_plot

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,3,3,0.7,0.7,1.1,12,1
1,Neighbour_1,Neighbour_2,3,3,0.7,0.8,1.1,12,1
2,Neighbour_1,Neighbour_2,3,5,0.7,0.8,1.1,17,1
3,Neighbour_1,Neighbour_2,3,3,0.8,0.7,1.1,12,1
4,Neighbour_1,Neighbour_2,5,3,0.8,0.7,1.1,17,1
5,Neighbour_1,Neighbour_2,3,5,0.8,0.8,1.1,18,1
6,Neighbour_1,Neighbour_2,5,3,0.8,0.8,1.1,18,1
7,Neighbour_1,Neighbour_2,5,5,0.8,0.8,1.1,24,1
8,Neighbour_1,Neighbour_2,3,5,0.7,0.7,1.3,14,1
9,Neighbour_1,Neighbour_2,5,3,0.7,0.7,1.3,14,1


In [59]:
# make T4 into np array
X = T4[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(0,10,1):
    
    lens1 = lenses_T4_correlation[to_plot.loc[i,'lens1']]
    lens2 = lenses_T4_correlation[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='cosine', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "T4_correlation_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/T4/T4_correlation_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_T4,
                     color_function=labels_T4,
                     nbins=len(np.unique(labels_T4)), 
                     title=title)



## IAVT4
### Euclidean


In [60]:
IAVT4_euclidean_sc_df_1a = pd.read_csv('../dataframes/IAVT4_euclidean_sc_df_1a.csv')
IAVT4_euclidean_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
IAVT4_euclidean_sc_df_1a.describe()

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,0,0,0,0,0,0,0,0,0
unique,0,0,0,0,0,0,0,0,0


In [63]:
# to_plot = IAVT4_euclidean_sc_df_1a[IAVT4_euclidean_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
# to_plot = to_plot[to_plot['n_vertices']>=10]
# to_plot.reset_index(drop=True, inplace=True)
# to_plot

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components


In [1]:
# # make IAVT4 into np array
# X = IAVT4[features[1:]].as_matrix()

# # init mapper
# mapper = km.KeplerMapper() 

# for i in np.arange(0,10,1):
    
#     lens1 = lenses_IAVT4_euclidean[to_plot.loc[i,'lens1']]
#     lens2 = lenses_IAVT4_euclidean[to_plot.loc[i,'lens2']]
#     lens = np.c_[lens1, lens2]
#     c1 = to_plot.loc[i,'lens1_n_cubes']
#     c2 = to_plot.loc[i,'lens2_n_cubes']
#     p1 = to_plot.loc[i,'lens1_perc_overlap']
#     p2 = to_plot.loc[i,'lens2_perc_overlap']
#     eps = to_plot.loc[i,'eps']

#     # create complex
#     scomplex = mapper.map(lens, 
#                           X, 
#                           cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
#                           clusterer=DBSCAN(metric='euclidean', eps=eps, min_samples=1),
#                                               precomputed=False)

#     meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
#                      "n_cubes":"[{},{}]".format(c1,c2),
#                      "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
#                      "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
#                      "SCALER":"MinMaxScaler(reature_range=(0,1))"}

#     title = "IAVT4_euclidean_sc_df_1a_{}.html".format(i)
#     path = "../mapper_output/IAVT4/IAVT4_euclidean_sim_1a/" + title
#     _ = mapper.visualize(graph=scomplex, 
#                      path_html=path, 
#                      custom_meta=meta_dict,                    
#                      custom_tooltips=labels_IAVT4,
#                      color_function=labels_IAVT4,
#                      nbins=len(np.unique(labels_IAVT4)), 
#                      title=title)

# # for col in features[1:]:
# #     title = "T4_{}_coloring.html".format(col)
# #     path = "mapper_output/" + title
# #     mapperT4.visualize(graph=scomplex, 
# #                      path_html=path, 
# #                      custom_meta=meta_dict,                    
# #                      custom_tooltips=T4[col].as_matrix(),
# #                      color_function=T4[col].as_matrix(),
# #                      nbins=5, 
# #                      title=title)

### Cosine

In [61]:
IAVT4_cosine_sc_df_1a = pd.read_csv('../dataframes/IAVT4_cosine_sc_df_1a.csv')
IAVT4_cosine_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
IAVT4_cosine_sc_df_1a.describe()

Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,550.0,550.0,550.0,550.0,550.0,550.0,550.0
mean,1.8,2.781818,0.58,0.607273,0.627273,4.767273,1.0
std,1.327858,2.311694,0.208996,0.193571,0.213782,5.435197,0.0
min,1.0,1.0,0.2,0.2,0.3,1.0,1.0
25%,1.0,1.0,0.4,0.5,0.5,1.0,1.0
50%,1.0,1.0,0.65,0.7,0.7,3.0,1.0
75%,3.0,3.0,0.8,0.8,0.9,5.0,1.0
max,5.0,9.0,0.8,0.8,0.9,33.0,1.0


In [62]:
to_plot = IAVT4_cosine_sc_df_1a[IAVT4_cosine_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>=10]
to_plot.reset_index(drop=True, inplace=True)
to_plot

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,3,5,0.7,0.8,0.3,15,1
1,Neighbour_1,Neighbour_2,3,3,0.8,0.7,0.3,12,1
2,Neighbour_1,Neighbour_2,5,3,0.8,0.7,0.3,15,1
3,Neighbour_1,Neighbour_2,5,3,0.8,0.8,0.3,15,1
4,Neighbour_1,Neighbour_2,3,5,0.8,0.8,0.3,21,1
5,Neighbour_1,Neighbour_2,5,5,0.8,0.8,0.3,25,1
6,Neighbour_1,Neighbour_2,3,5,0.7,0.7,0.5,11,1
7,Neighbour_1,Neighbour_2,3,5,0.7,0.8,0.5,13,1
8,Neighbour_1,Neighbour_2,3,7,0.7,0.8,0.5,16,1
9,Neighbour_1,Neighbour_2,3,9,0.7,0.8,0.5,19,1


In [63]:
# make IAVT4 into np array
X = IAVT4[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(0,10,1):
    
    lens1 = lenses_IAVT4_cosine[to_plot.loc[i,'lens1']]
    lens2 = lenses_IAVT4_cosine[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='cosine', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "IAVT4_cosine_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/IAVT4/IAVT4_cosine_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_IAVT4,
                     color_function=labels_IAVT4,
                     nbins=len(np.unique(labels_IAVT4)), 
                     title=title)



### Correlation

In [64]:
IAVT4_correlation_sc_df_1a = pd.read_csv('../dataframes/IAVT4_correlation_sc_df_1a.csv')
IAVT4_correlation_sc_df_1a.drop(columns=['Unnamed: 0'], inplace=True)
IAVT4_correlation_sc_df_1a.describe()


Unnamed: 0,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
count,1150.0,1150.0,1150.0,1150.0,1150.0,1150.0,1150.0
mean,1.8,2.895652,0.58,0.610435,1.230435,4.918261,1.0
std,1.327227,2.372507,0.208897,0.19136,0.443927,5.539503,0.0
min,1.0,1.0,0.2,0.2,0.5,1.0,1.0
25%,1.0,1.0,0.4,0.5,0.9,1.0,1.0
50%,1.0,3.0,0.65,0.7,1.3,3.0,1.0
75%,3.0,5.0,0.8,0.8,1.7,6.0,1.0
max,5.0,9.0,0.8,0.8,1.9,33.0,1.0


In [65]:
to_plot = IAVT4_correlation_sc_df_1a[IAVT4_correlation_sc_df_1a['n_connected_components']==1].sort_values(by=['eps', 'lens1_perc_overlap', 'lens2_perc_overlap', 'n_vertices'])
to_plot = to_plot[to_plot['n_vertices']>=10]
to_plot.reset_index(drop=True, inplace=True)
to_plot

Unnamed: 0,lens1,lens2,lens1_n_cubes,lens2_n_cubes,lens1_perc_overlap,lens2_perc_overlap,eps,n_vertices,n_connected_components
0,Neighbour_1,Neighbour_2,3,5,0.7,0.8,0.5,15,1
1,Neighbour_1,Neighbour_2,3,3,0.8,0.7,0.5,12,1
2,Neighbour_1,Neighbour_2,5,3,0.8,0.7,0.5,16,1
3,Neighbour_1,Neighbour_2,5,3,0.8,0.8,0.5,18,1
4,Neighbour_1,Neighbour_2,3,5,0.8,0.8,0.5,21,1
5,Neighbour_1,Neighbour_2,5,5,0.8,0.8,0.5,26,1
6,Neighbour_1,Neighbour_2,3,5,0.7,0.7,0.7,11,1
7,Neighbour_1,Neighbour_2,3,5,0.7,0.8,0.7,13,1
8,Neighbour_1,Neighbour_2,3,7,0.7,0.8,0.7,16,1
9,Neighbour_1,Neighbour_2,3,9,0.7,0.8,0.7,19,1


In [66]:
# make IAVT4 into np array
X = IAVT4[features[1:]].as_matrix()

# init mapper
mapper = km.KeplerMapper() 

for i in np.arange(0,10,1):
    
    lens1 = lenses_IAVT4_correlation[to_plot.loc[i,'lens1']]
    lens2 = lenses_IAVT4_correlation[to_plot.loc[i,'lens2']]
    lens = np.c_[lens1, lens2]
    c1 = to_plot.loc[i,'lens1_n_cubes']
    c2 = to_plot.loc[i,'lens2_n_cubes']
    p1 = to_plot.loc[i,'lens1_perc_overlap']
    p2 = to_plot.loc[i,'lens2_perc_overlap']
    eps = to_plot.loc[i,'eps']

    # create complex
    scomplex = mapper.map(lens, 
                          X, 
                          cover=km.cover.Cover(n_cubes=[c1,c2],perc_overlap=[p1,p2]),
                          clusterer=DBSCAN(metric='correlation', eps=eps, min_samples=1),
                                              precomputed=False)

    meta_dict = {"PROJECTIONS":"[{},{}]".format( to_plot.loc[i,'lens1'] , to_plot.loc[i,'lens2']),
                     "n_cubes":"[{},{}]".format(c1,c2),
                     "PERCENTAGE OVERLAP":"[{},{}]".format(p1,p2),
                     "CLUSTERER":DBSCAN(metric='cosine', eps=eps, min_samples=1),
                     "SCALER":"MinMaxScaler(reature_range=(0,1))"}

    title = "IAVT4_correlation_sc_df_1a_{}.html".format(i)
    path = "../mapper_output/IAVT4/IAVT4_correlation_sim_1a/" + title
    _ = mapper.visualize(graph=scomplex, 
                     path_html=path, 
                     custom_meta=meta_dict,                    
                     custom_tooltips=labels_IAVT4,
                     color_function=labels_IAVT4,
                     nbins=len(np.unique(labels_IAVT4)), 
                     title=title)

