# Spurious Correlations of Highly Dimensional Big Data

This Notebook aims at showing how PCA and random projection can solve the problem of spurious correlations in Big Data.

In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn import random_projection
from sklearn.utils import shuffle

## DataFrame Creation

In [2]:
# User defined parameters

# Number of rows for df1
x = 5000

# Number of columns for df1
y = 200000

# Number of rows for df2
z = 500

In [3]:
# Creating a dataframe with x number of rows and y number of columns
df = pd.DataFrame(np.random.random_sample((x,y)))
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,299990,299991,299992,299993,299994,299995,299996,299997,299998,299999
0,0.417640,0.644939,0.221708,0.904634,0.792646,0.189413,0.293599,0.012901,0.214108,0.411493,...,0.374352,0.900594,0.592744,0.912854,0.420652,0.434338,0.181139,0.746609,0.723072,0.351574
1,0.829965,0.704829,0.282245,0.727878,0.039001,0.606415,0.660823,0.396016,0.263427,0.409099,...,0.756934,0.263043,0.119627,0.376246,0.325063,0.447539,0.853277,0.732058,0.347604,0.237725
2,0.234594,0.596710,0.775527,0.445312,0.123260,0.490638,0.244372,0.543193,0.396774,0.507737,...,0.681941,0.510339,0.256345,0.682994,0.788208,0.778415,0.244206,0.419600,0.955432,0.438029
3,0.775581,0.944236,0.128028,0.536473,0.083218,0.859482,0.623345,0.976117,0.699179,0.497733,...,0.481136,0.671557,0.043669,0.295185,0.753800,0.902967,0.812640,0.444282,0.328801,0.412990
4,0.020846,0.384158,0.040876,0.337100,0.381273,0.624468,0.650453,0.351976,0.723423,0.177547,...,0.271951,0.572935,0.613071,0.064150,0.478174,0.083643,0.389626,0.956407,0.380790,0.228078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,0.910005,0.428719,0.756066,0.639703,0.613390,0.615139,0.318589,0.602295,0.169620,0.359488,...,0.881424,0.157725,0.134195,0.566417,0.814375,0.810154,0.254472,0.471635,0.829704,0.123226
2996,0.411078,0.716931,0.952651,0.885535,0.766238,0.135935,0.207512,0.973253,0.499106,0.852944,...,0.589713,0.119805,0.864233,0.934275,0.731760,0.871394,0.182226,0.876936,0.316849,0.552902
2997,0.819037,0.915367,0.254262,0.528698,0.521671,0.365217,0.890019,0.467687,0.320662,0.492563,...,0.974158,0.381885,0.187870,0.842050,0.344880,0.703126,0.333918,0.672703,0.817015,0.917165
2998,0.220018,0.126678,0.943651,0.154535,0.692384,0.691928,0.052690,0.450650,0.367738,0.819778,...,0.544320,0.475010,0.740158,0.908851,0.450364,0.694077,0.940656,0.619837,0.882289,0.870456


In order to assess the correlations of the different parameters, correlations between the column with index 0 and the 999 other first columns is assessed.

In [4]:
# Assigning X to all columns except 0
X_df = df.drop(columns=0)
X_df.head()

# Assigning Y to column 0
Y_df = df[0]
print(Y_df)

# The following line makes Y become a list
Y_df = np.array(Y_df).reshape(-1)
print(X_df.shape,Y_df.shape)

list_titles = X_df.columns
list_corr_df1 = []
for i in list_titles[0:1000]:
    print(i)
    list_corr_df1.append(abs(np.corrcoef(Y_df, X_df[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df, X_df[i])[0][1]))

0       0.417640
1       0.829965
2       0.234594
3       0.775581
4       0.020846
          ...   
2995    0.910005
2996    0.411078
2997    0.819037
2998    0.220018
2999    0.984108
Name: 0, Length: 3000, dtype: float64
(3000, 299999) (3000,)
1
Correlation matrix for column 0 and and column1: 0.0021281126582517636
2
Correlation matrix for column 0 and and column2: -0.0020445593781643107
3
Correlation matrix for column 0 and and column3: 0.015525983430299081
4
Correlation matrix for column 0 and and column4: 0.010114891866500381
5
Correlation matrix for column 0 and and column5: 0.014005624238090009
6
Correlation matrix for column 0 and and column6: -0.005428571417101189
7
Correlation matrix for column 0 and and column7: 0.012505878915322061
8
Correlation matrix for column 0 and and column8: -0.006165044225402236
9
Correlation matrix for column 0 and and column9: 0.02383890234561144
10
Correlation matrix for column 0 and and column10: -0.011289332476964873
11
Correlation matrix for

Correlation matrix for column 0 and and column127: 0.010836868100534998
128
Correlation matrix for column 0 and and column128: -0.004459700302276692
129
Correlation matrix for column 0 and and column129: 0.006208960414927951
130
Correlation matrix for column 0 and and column130: -0.0014674643135961227
131
Correlation matrix for column 0 and and column131: -0.013024879888698647
132
Correlation matrix for column 0 and and column132: 0.010224901627150996
133
Correlation matrix for column 0 and and column133: -0.006618258913543559
134
Correlation matrix for column 0 and and column134: -0.03956800154890536
135
Correlation matrix for column 0 and and column135: -0.022656028759329504
136
Correlation matrix for column 0 and and column136: 0.025115772559520553
137
Correlation matrix for column 0 and and column137: 0.009871874369736132
138
Correlation matrix for column 0 and and column138: -0.00941121292455676
139
Correlation matrix for column 0 and and column139: 0.012193095309952164
140
Correl

Correlation matrix for column 0 and and column316: -0.006591491931512428
317
Correlation matrix for column 0 and and column317: -0.005487384890648972
318
Correlation matrix for column 0 and and column318: -0.026005677168599455
319
Correlation matrix for column 0 and and column319: 0.02748399693006331
320
Correlation matrix for column 0 and and column320: -0.025683142076380852
321
Correlation matrix for column 0 and and column321: 0.033976750903592784
322
Correlation matrix for column 0 and and column322: 0.008509026732950068
323
Correlation matrix for column 0 and and column323: -0.024134168836349962
324
Correlation matrix for column 0 and and column324: 0.010242280536409638
325
Correlation matrix for column 0 and and column325: 0.02546924972795821
326
Correlation matrix for column 0 and and column326: -0.009570075111192329
327
Correlation matrix for column 0 and and column327: 0.009833482239874595
328
Correlation matrix for column 0 and and column328: 0.0097681027143917
329
Correlatio

542
Correlation matrix for column 0 and and column542: 0.02026882435145881
543
Correlation matrix for column 0 and and column543: 0.03806461162946715
544
Correlation matrix for column 0 and and column544: 0.02322744067020096
545
Correlation matrix for column 0 and and column545: -0.001775239642075443
546
Correlation matrix for column 0 and and column546: 0.03591153432858109
547
Correlation matrix for column 0 and and column547: 0.01760475633942527
548
Correlation matrix for column 0 and and column548: 0.02067730219969171
549
Correlation matrix for column 0 and and column549: -0.0055627283612068095
550
Correlation matrix for column 0 and and column550: 0.007181170860273548
551
Correlation matrix for column 0 and and column551: -0.010121939296264312
552
Correlation matrix for column 0 and and column552: 0.017378823922916193
553
Correlation matrix for column 0 and and column553: 0.010711915132212764
554
Correlation matrix for column 0 and and column554: 0.013043484765553328
555
Correlatio

Correlation matrix for column 0 and and column754: 0.0012432683259950088
755
Correlation matrix for column 0 and and column755: 0.0001594048096143479
756
Correlation matrix for column 0 and and column756: 0.00014738757623138999
757
Correlation matrix for column 0 and and column757: 0.03859031400615399
758
Correlation matrix for column 0 and and column758: -0.03342779329561143
759
Correlation matrix for column 0 and and column759: -0.0006183642240023205
760
Correlation matrix for column 0 and and column760: -0.008149985746872694
761
Correlation matrix for column 0 and and column761: 0.013908923033066383
762
Correlation matrix for column 0 and and column762: -0.022863467845352904
763
Correlation matrix for column 0 and and column763: 0.0031321538027618426
764
Correlation matrix for column 0 and and column764: -0.02103608772928147
765
Correlation matrix for column 0 and and column765: -0.03653219020459895
766
Correlation matrix for column 0 and and column766: 0.020431361882525267
767
Corr

Correlation matrix for column 0 and and column974: -0.01605824998085753
975
Correlation matrix for column 0 and and column975: -0.02693115626089898
976
Correlation matrix for column 0 and and column976: -0.049417775165869524
977
Correlation matrix for column 0 and and column977: -0.033493929806482754
978
Correlation matrix for column 0 and and column978: -0.0157969188487043
979
Correlation matrix for column 0 and and column979: 0.013259404081306693
980
Correlation matrix for column 0 and and column980: -0.0019353136363196835
981
Correlation matrix for column 0 and and column981: -0.02269401885536881
982
Correlation matrix for column 0 and and column982: -0.01930706544089197
983
Correlation matrix for column 0 and and column983: -0.013313366110317887
984
Correlation matrix for column 0 and and column984: 0.041306636838300735
985
Correlation matrix for column 0 and and column985: 0.0309376832548463
986
Correlation matrix for column 0 and and column986: -0.027234401788766762
987
Correlati

To see if indeed the correlations between the different parameters increases with the data size, the original dataframe is compared to a sub-set dataframe which only takes the first z rows of the original dataframe. If the correlations in the original dataframe are higher than in the smaller dataframe this would prove that the bigger the data size the more frequent the number of spurious correlations.

In [5]:
# Creating smaller dataframe taking z number of rows from original dataframe
df2 = df.iloc[:z]
df2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,299990,299991,299992,299993,299994,299995,299996,299997,299998,299999
0,0.417640,0.644939,0.221708,0.904634,0.792646,0.189413,0.293599,0.012901,0.214108,0.411493,...,0.374352,0.900594,0.592744,0.912854,0.420652,0.434338,0.181139,0.746609,0.723072,0.351574
1,0.829965,0.704829,0.282245,0.727878,0.039001,0.606415,0.660823,0.396016,0.263427,0.409099,...,0.756934,0.263043,0.119627,0.376246,0.325063,0.447539,0.853277,0.732058,0.347604,0.237725
2,0.234594,0.596710,0.775527,0.445312,0.123260,0.490638,0.244372,0.543193,0.396774,0.507737,...,0.681941,0.510339,0.256345,0.682994,0.788208,0.778415,0.244206,0.419600,0.955432,0.438029
3,0.775581,0.944236,0.128028,0.536473,0.083218,0.859482,0.623345,0.976117,0.699179,0.497733,...,0.481136,0.671557,0.043669,0.295185,0.753800,0.902967,0.812640,0.444282,0.328801,0.412990
4,0.020846,0.384158,0.040876,0.337100,0.381273,0.624468,0.650453,0.351976,0.723423,0.177547,...,0.271951,0.572935,0.613071,0.064150,0.478174,0.083643,0.389626,0.956407,0.380790,0.228078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,0.209098,0.510440,0.044747,0.119252,0.268939,0.983160,0.712235,0.814527,0.247655,0.845343,...,0.215110,0.459119,0.533713,0.756126,0.532143,0.862801,0.671165,0.696934,0.798059,0.343863
296,0.672308,0.245945,0.927050,0.996259,0.406444,0.273994,0.424101,0.479692,0.124631,0.562066,...,0.286774,0.486221,0.085778,0.635783,0.693732,0.139261,0.876728,0.183098,0.374461,0.243721
297,0.689670,0.204871,0.805912,0.967352,0.740845,0.710703,0.368256,0.819092,0.283456,0.134293,...,0.167063,0.897405,0.576779,0.720546,0.200122,0.985502,0.666988,0.740938,0.894809,0.523664
298,0.181265,0.362803,0.094114,0.679094,0.870990,0.705246,0.185530,0.745824,0.624403,0.337354,...,0.449627,0.195911,0.244982,0.416930,0.594151,0.560413,0.404322,0.459514,0.628016,0.713623


In [6]:
# Assigning X to all columns except 0
X_df2 = df2.drop(columns=0)
X_df2.head()

# Assigning Y to column 0
Y_df2 = df2[0]
print(Y_df2)

# The following line makes Y become a list
Y_df2 = np.array(Y_df2).reshape(-1)
print(X_df2.shape,Y_df2.shape)

list_titles = X_df2.columns

list_corr_df2 = []
for i in list_titles[0:1000]:
    print(i)
    list_corr_df2.append(abs(np.corrcoef(Y_df2, X_df2[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df2, X_df2[i])[0][1]))

0      0.417640
1      0.829965
2      0.234594
3      0.775581
4      0.020846
         ...   
295    0.209098
296    0.672308
297    0.689670
298    0.181265
299    0.517405
Name: 0, Length: 300, dtype: float64
(300, 299999) (300,)
1
Correlation matrix for column 0 and and column1: -0.021612253443875667
2
Correlation matrix for column 0 and and column2: 0.03913794625222781
3
Correlation matrix for column 0 and and column3: 0.07982616895875433
4
Correlation matrix for column 0 and and column4: 0.00945320298783765
5
Correlation matrix for column 0 and and column5: 0.064839103656806
6
Correlation matrix for column 0 and and column6: -0.048764365855227226
7
Correlation matrix for column 0 and and column7: 0.07734077115357678
8
Correlation matrix for column 0 and and column8: -0.001708198309472604
9
Correlation matrix for column 0 and and column9: 0.009221816976348617
10
Correlation matrix for column 0 and and column10: 0.04162781555373784
11
Correlation matrix for column 0 and and column

Correlation matrix for column 0 and and column183: -0.028342241367913402
184
Correlation matrix for column 0 and and column184: -0.015009761733333592
185
Correlation matrix for column 0 and and column185: -0.0712413518528538
186
Correlation matrix for column 0 and and column186: -0.057300031096565195
187
Correlation matrix for column 0 and and column187: -0.03948158314680934
188
Correlation matrix for column 0 and and column188: 0.05632723462053229
189
Correlation matrix for column 0 and and column189: -0.025168098146780185
190
Correlation matrix for column 0 and and column190: -0.00463173231176717
191
Correlation matrix for column 0 and and column191: -0.04886561423304587
192
Correlation matrix for column 0 and and column192: 0.010646658609378057
193
Correlation matrix for column 0 and and column193: 0.06726905716057854
194
Correlation matrix for column 0 and and column194: -0.02110288721063795
195
Correlation matrix for column 0 and and column195: 0.06232281380240827
196
Correlation 

Correlation matrix for column 0 and and column469: -0.05621530442719902
470
Correlation matrix for column 0 and and column470: 0.02547158022119871
471
Correlation matrix for column 0 and and column471: 0.0009566666984285385
472
Correlation matrix for column 0 and and column472: -0.004208161758503333
473
Correlation matrix for column 0 and and column473: 0.059328750462920764
474
Correlation matrix for column 0 and and column474: 0.0045997343181592015
475
Correlation matrix for column 0 and and column475: -0.01427942081674191
476
Correlation matrix for column 0 and and column476: 0.05039108288660192
477
Correlation matrix for column 0 and and column477: -0.016952412623091162
478
Correlation matrix for column 0 and and column478: 0.05823102603002012
479
Correlation matrix for column 0 and and column479: 0.1584235452561276
480
Correlation matrix for column 0 and and column480: 0.11066065973969606
481
Correlation matrix for column 0 and and column481: -0.007352079567257404
482
Correlation m

681
Correlation matrix for column 0 and and column681: 0.06553690715685642
682
Correlation matrix for column 0 and and column682: -0.0022148249926205003
683
Correlation matrix for column 0 and and column683: 0.011067769590198591
684
Correlation matrix for column 0 and and column684: -0.016281504446622325
685
Correlation matrix for column 0 and and column685: 0.0021814215188547936
686
Correlation matrix for column 0 and and column686: 0.08634887181355483
687
Correlation matrix for column 0 and and column687: -0.03143761928963136
688
Correlation matrix for column 0 and and column688: 0.032419150450766494
689
Correlation matrix for column 0 and and column689: -0.07920745121903268
690
Correlation matrix for column 0 and and column690: -0.08684185348800355
691
Correlation matrix for column 0 and and column691: 0.05061531896244811
692
Correlation matrix for column 0 and and column692: 0.020025709619648162
693
Correlation matrix for column 0 and and column693: -0.05922745108638768
694
Correla

Correlation matrix for column 0 and and column935: -0.011566118468828488
936
Correlation matrix for column 0 and and column936: -0.013211556755495053
937
Correlation matrix for column 0 and and column937: 0.09926467742443541
938
Correlation matrix for column 0 and and column938: -0.018914279960096547
939
Correlation matrix for column 0 and and column939: 0.038622668080941946
940
Correlation matrix for column 0 and and column940: 0.010944198651094365
941
Correlation matrix for column 0 and and column941: -0.04850982741678376
942
Correlation matrix for column 0 and and column942: 0.051263352484408865
943
Correlation matrix for column 0 and and column943: 0.023113584220150623
944
Correlation matrix for column 0 and and column944: -0.10670884660333324
945
Correlation matrix for column 0 and and column945: -0.05053208664861124
946
Correlation matrix for column 0 and and column946: -0.08809583555101422
947
Correlation matrix for column 0 and and column947: -0.08365107342005554
948
Correlatio

We now compare which of the correlation lists has the highest numbers of every parameter

In [7]:
a = 0
b = 0
for i in range(0,1000):
    if abs(list_corr_df1[i]) > abs(list_corr_df2[i]):
        a+=1
    elif abs(list_corr_df1[i]) < abs(list_corr_df2[i]):
        b+=1
    else:
        print()
        
print(a)
print(b)

175
825


In [8]:
a = 0
b = 0
for i in range(0,len(list_corr_df1)):
    if abs(list_corr_df1[i]) > 0.01:
        a+=1
    else:
        pass
        
for i in range(0,len(list_corr_df2)):
    if abs(list_corr_df2[i]) > 0.01:
        b+=1
    else:
        pass

print("Percentage of correlations in df1:",(a/len(list_corr_df1)*100),"%")

print("Percentage of correlations in df2:",(b/len(list_corr_df2)*100),"%")

Percentage of correlations in df1: 59.4 %
Percentage of correlations in df2: 86.2 %


Clearly it is seen that the lower the number of observations, the higher the frequency of spurious correlations. However the number of spurious correlations present in the bigger DataFrame is still very significant.

To counter spurious correlations, random projection can be used.

## Random Projection eps = 0.1

In [9]:
# Pass df1 in the random projection to create a new reduced DataFrame
transformer = random_projection.GaussianRandomProjection(eps = 0.1)
df_new = pd.DataFrame(transformer.fit_transform(df))
df_new

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,6852,6853,6854,6855,6856,6857,6858,6859,6860,6861
0,1.707261,4.102608,4.223011,-5.998484,2.206299,3.547938,4.306893,3.944708,1.712250,2.650398,...,-2.280992,-0.771373,-1.759600,1.800296,-0.327829,-6.003393,-9.279215,-2.670972,-8.399094,-0.684578
1,2.988094,3.808211,4.465014,-2.182051,-0.069154,-1.323365,4.641590,6.414711,3.579814,-1.126663,...,-1.820994,0.518847,-3.836670,0.142118,-5.150236,-5.266342,-7.972169,-0.770843,-2.283928,-3.839149
2,5.213717,1.124537,2.346610,-7.956089,0.070361,-1.641761,1.320331,8.134109,-2.211134,0.572791,...,-2.303163,-1.475211,0.366988,-2.044988,-2.245292,-4.188299,-7.045560,-1.305531,-2.772024,1.126368
3,6.446041,4.608101,4.321187,1.414273,1.434350,1.344971,4.520745,5.451909,1.391492,1.875584,...,0.490739,1.684678,-2.409532,0.934401,0.559362,-10.043876,-7.766306,-2.932847,-4.106536,-0.043903
4,2.449159,1.400599,3.099165,-5.695587,0.578874,1.340735,-0.725826,2.050343,2.431286,0.478856,...,-4.526199,-1.013694,-0.960381,-0.909694,2.242588,-5.711396,-10.115547,-0.549087,-6.477726,0.729283
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,4.283993,3.049698,3.249505,-1.050564,2.824446,2.287962,2.787690,3.657387,-0.488799,-1.884894,...,-1.166011,1.953954,-1.798940,0.428565,-2.206732,-9.107455,-6.644900,1.731911,-4.560308,-4.385990
2996,5.971685,-0.835952,3.195576,-1.337031,5.238520,-4.000967,-0.174802,9.695986,-0.709488,-0.852483,...,-0.744880,-3.151087,-4.008243,-0.526925,-3.358984,-9.767510,-12.074677,0.311917,-5.546515,-2.311208
2997,2.527645,0.469588,6.212457,-0.220341,2.389397,-1.884688,1.834597,6.758467,-2.357756,0.237159,...,-2.552114,-2.733815,-1.407707,1.265158,-2.962225,-9.046993,-5.452469,-1.555243,-3.938973,-4.458762
2998,3.587347,2.530834,3.957988,-1.966966,-1.699458,3.878411,1.806295,5.799878,-0.355005,1.458191,...,-0.762584,4.303084,-2.290763,-1.066719,-2.381775,-8.859431,-8.027471,-1.375169,-1.681389,-1.311747


In [10]:
# Assigning X to all columns except 0
X_df_new = df_new.drop(columns=0)

# Assigning Y to column 0
Y_df_new = df_new[0]
print(Y_df_new)

# The following line makes Y become a list
Y_df_new = np.array(Y_df_new).reshape(-1)
print(X_df_new.shape,Y_df_new.shape)

list_titles = X_df_new.columns

list_corr_df_new = []
for i in list_titles[0:len(list_titles)]:
    print(i)
    list_corr_df_new.append(abs(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))

0       1.707261
1       2.988094
2       5.213717
3       6.446041
4       2.449159
          ...   
2995    4.283993
2996    5.971685
2997    2.527645
2998    3.587347
2999    3.613265
Name: 0, Length: 3000, dtype: float64
(3000, 6861) (3000,)
1
Correlation matrix for column 0 and and column1: -0.01840719163849457
2
Correlation matrix for column 0 and and column2: 0.00873156430823405
3
Correlation matrix for column 0 and and column3: -0.0004791344814739626
4
Correlation matrix for column 0 and and column4: 0.009631528004331527
5
Correlation matrix for column 0 and and column5: 0.004575374486148968
6
Correlation matrix for column 0 and and column6: 0.020475138500224487
7
Correlation matrix for column 0 and and column7: 0.01089142325281875
8
Correlation matrix for column 0 and and column8: 0.01018181895617888
9
Correlation matrix for column 0 and and column9: 0.004674294537365567
10
Correlation matrix for column 0 and and column10: -0.03622035601398536
11
Correlation matrix for column 

Correlation matrix for column 0 and and column188: -0.02762096032971518
189
Correlation matrix for column 0 and and column189: 0.037887842162641086
190
Correlation matrix for column 0 and and column190: 0.029792312044762483
191
Correlation matrix for column 0 and and column191: 0.00010715091165977457
192
Correlation matrix for column 0 and and column192: 0.01412527892896303
193
Correlation matrix for column 0 and and column193: -0.020769152208774715
194
Correlation matrix for column 0 and and column194: 0.0024680651233539882
195
Correlation matrix for column 0 and and column195: 0.000540384798082443
196
Correlation matrix for column 0 and and column196: 0.012789746671925119
197
Correlation matrix for column 0 and and column197: -0.025474623137264115
198
Correlation matrix for column 0 and and column198: 0.00999608867183794
199
Correlation matrix for column 0 and and column199: 0.012617734171001493
200
Correlation matrix for column 0 and and column200: -0.03279622387192046
201
Correlati

Correlation matrix for column 0 and and column408: -0.0252464735777073
409
Correlation matrix for column 0 and and column409: -0.017018603922238938
410
Correlation matrix for column 0 and and column410: -0.015223343789649284
411
Correlation matrix for column 0 and and column411: -0.0055232087488072314
412
Correlation matrix for column 0 and and column412: -0.0067344639336630804
413
Correlation matrix for column 0 and and column413: 0.004949628880770382
414
Correlation matrix for column 0 and and column414: 0.037059520264047456
415
Correlation matrix for column 0 and and column415: 0.0032616154406090296
416
Correlation matrix for column 0 and and column416: -0.005768602398549938
417
Correlation matrix for column 0 and and column417: -0.008826577049164959
418
Correlation matrix for column 0 and and column418: 0.0035764960927183996
419
Correlation matrix for column 0 and and column419: -0.018198736732500623
420
Correlation matrix for column 0 and and column420: -0.006882273553358053
421
C

Correlation matrix for column 0 and and column565: -0.007142044520729355
566
Correlation matrix for column 0 and and column566: 0.0036095181794317715
567
Correlation matrix for column 0 and and column567: 0.01391522900366612
568
Correlation matrix for column 0 and and column568: -0.008024703950408494
569
Correlation matrix for column 0 and and column569: -0.004081992260809668
570
Correlation matrix for column 0 and and column570: 0.008959846471039746
571
Correlation matrix for column 0 and and column571: 0.0025682373949159304
572
Correlation matrix for column 0 and and column572: -0.005373577716822079
573
Correlation matrix for column 0 and and column573: -0.017475252177617136
574
Correlation matrix for column 0 and and column574: 0.021203328353329654
575
Correlation matrix for column 0 and and column575: 0.03844042275955814
576
Correlation matrix for column 0 and and column576: -0.013176900152293489
577
Correlation matrix for column 0 and and column577: 0.02513818841597288
578
Correla

Correlation matrix for column 0 and and column763: -0.007151625649101312
764
Correlation matrix for column 0 and and column764: -0.002637020239719763
765
Correlation matrix for column 0 and and column765: -0.005146122644170681
766
Correlation matrix for column 0 and and column766: 0.0058906672327227365
767
Correlation matrix for column 0 and and column767: -0.013227208961147352
768
Correlation matrix for column 0 and and column768: 0.0039507175849546996
769
Correlation matrix for column 0 and and column769: 0.0008815753276872155
770
Correlation matrix for column 0 and and column770: 0.017852903255601196
771
Correlation matrix for column 0 and and column771: 0.0057877384450164965
772
Correlation matrix for column 0 and and column772: -0.005695589049164178
773
Correlation matrix for column 0 and and column773: -0.002318534375636771
774
Correlation matrix for column 0 and and column774: -0.005040353841986572
775
Correlation matrix for column 0 and and column775: 0.014136320720138035
776
C

Correlation matrix for column 0 and and column984: -0.022872495255546008
985
Correlation matrix for column 0 and and column985: -0.0024372660692910265
986
Correlation matrix for column 0 and and column986: -0.0011336862654475032
987
Correlation matrix for column 0 and and column987: 0.0062559920090923486
988
Correlation matrix for column 0 and and column988: -0.001655281116871232
989
Correlation matrix for column 0 and and column989: -0.0007194639318944924
990
Correlation matrix for column 0 and and column990: 0.022372486261240682
991
Correlation matrix for column 0 and and column991: -0.00616544060873204
992
Correlation matrix for column 0 and and column992: -0.009596541151833069
993
Correlation matrix for column 0 and and column993: -0.0008285599135310234
994
Correlation matrix for column 0 and and column994: -0.022358183118013967
995
Correlation matrix for column 0 and and column995: -0.010778020409724543
996
Correlation matrix for column 0 and and column996: -0.0024832524143728516


Correlation matrix for column 0 and and column1138: -0.0016261615680995952
1139
Correlation matrix for column 0 and and column1139: 0.021365742879528096
1140
Correlation matrix for column 0 and and column1140: 0.019821168563056872
1141
Correlation matrix for column 0 and and column1141: -0.0007880873734340433
1142
Correlation matrix for column 0 and and column1142: 0.028498681937822835
1143
Correlation matrix for column 0 and and column1143: 0.015348170122681276
1144
Correlation matrix for column 0 and and column1144: 0.005670950305491071
1145
Correlation matrix for column 0 and and column1145: -0.01988200588491704
1146
Correlation matrix for column 0 and and column1146: 0.021903752389806245
1147
Correlation matrix for column 0 and and column1147: 0.02319491409251494
1148
Correlation matrix for column 0 and and column1148: -0.014653404405457985
1149
Correlation matrix for column 0 and and column1149: -0.019354314664118754
1150
Correlation matrix for column 0 and and column1150: 0.01970

Correlation matrix for column 0 and and column1411: -0.048863293285227524
1412
Correlation matrix for column 0 and and column1412: -0.011427750846806996
1413
Correlation matrix for column 0 and and column1413: 0.0014434113348550488
1414
Correlation matrix for column 0 and and column1414: -0.027621940592083394
1415
Correlation matrix for column 0 and and column1415: -0.026740192507623613
1416
Correlation matrix for column 0 and and column1416: 0.0055323875616169
1417
Correlation matrix for column 0 and and column1417: -0.0138519322918155
1418
Correlation matrix for column 0 and and column1418: 0.021258933748478512
1419
Correlation matrix for column 0 and and column1419: -0.03463747546572198
1420
Correlation matrix for column 0 and and column1420: -0.012597147535201764
1421
Correlation matrix for column 0 and and column1421: -0.020420196254564538
1422
Correlation matrix for column 0 and and column1422: -0.004369364050533155
1423
Correlation matrix for column 0 and and column1423: -0.0210

1667
Correlation matrix for column 0 and and column1667: -0.006231244020515674
1668
Correlation matrix for column 0 and and column1668: 0.01097432630102427
1669
Correlation matrix for column 0 and and column1669: 0.001988757578460739
1670
Correlation matrix for column 0 and and column1670: -0.0238924756080874
1671
Correlation matrix for column 0 and and column1671: 0.01831089605624154
1672
Correlation matrix for column 0 and and column1672: -0.03137125394516524
1673
Correlation matrix for column 0 and and column1673: 0.011212713334811688
1674
Correlation matrix for column 0 and and column1674: -0.0037236264527030537
1675
Correlation matrix for column 0 and and column1675: 0.012171969391304637
1676
Correlation matrix for column 0 and and column1676: -0.014729335042440334
1677
Correlation matrix for column 0 and and column1677: -0.008622559837973224
1678
Correlation matrix for column 0 and and column1678: -0.017884798343971803
1679
Correlation matrix for column 0 and and column1679: 0.01

Correlation matrix for column 0 and and column1906: 0.010279837001319903
1907
Correlation matrix for column 0 and and column1907: -0.016517288417429964
1908
Correlation matrix for column 0 and and column1908: 0.023155645413950956
1909
Correlation matrix for column 0 and and column1909: -0.021633227876623015
1910
Correlation matrix for column 0 and and column1910: 0.007839665944247794
1911
Correlation matrix for column 0 and and column1911: 0.005200867336969715
1912
Correlation matrix for column 0 and and column1912: -0.0010022866583745949
1913
Correlation matrix for column 0 and and column1913: -0.01721963939024225
1914
Correlation matrix for column 0 and and column1914: -0.0037343901474186235
1915
Correlation matrix for column 0 and and column1915: 0.0036589154470556325
1916
Correlation matrix for column 0 and and column1916: -0.035026610772427906
1917
Correlation matrix for column 0 and and column1917: 0.01674276148800231
1918
Correlation matrix for column 0 and and column1918: -0.03

Correlation matrix for column 0 and and column2139: -0.0015078759097484214
2140
Correlation matrix for column 0 and and column2140: 0.022763709999183666
2141
Correlation matrix for column 0 and and column2141: 0.021751718337194213
2142
Correlation matrix for column 0 and and column2142: -0.0036446898301124068
2143
Correlation matrix for column 0 and and column2143: 0.02094371429317255
2144
Correlation matrix for column 0 and and column2144: -0.03238455595363568
2145
Correlation matrix for column 0 and and column2145: 0.0009257493329372588
2146
Correlation matrix for column 0 and and column2146: 0.004039844872293752
2147
Correlation matrix for column 0 and and column2147: 0.007011317432723535
2148
Correlation matrix for column 0 and and column2148: -0.018024834482194
2149
Correlation matrix for column 0 and and column2149: -0.012948109096722078
2150
Correlation matrix for column 0 and and column2150: -0.014875947780876024
2151
Correlation matrix for column 0 and and column2151: -0.01229

Correlation matrix for column 0 and and column2363: 0.0035176926199553546
2364
Correlation matrix for column 0 and and column2364: 0.05069269626049513
2365
Correlation matrix for column 0 and and column2365: -0.0057073817969463285
2366
Correlation matrix for column 0 and and column2366: -0.017197633620866425
2367
Correlation matrix for column 0 and and column2367: 0.0073132440664622
2368
Correlation matrix for column 0 and and column2368: -0.008961298533880398
2369
Correlation matrix for column 0 and and column2369: -0.012572797531184177
2370
Correlation matrix for column 0 and and column2370: -0.01807856544300822
2371
Correlation matrix for column 0 and and column2371: 0.00043230987305131253
2372
Correlation matrix for column 0 and and column2372: 0.019730745693291474
2373
Correlation matrix for column 0 and and column2373: 0.012108582878079683
2374
Correlation matrix for column 0 and and column2374: -0.009942988229573865
2375
Correlation matrix for column 0 and and column2375: 0.0111

Correlation matrix for column 0 and and column2586: 0.012669608571708854
2587
Correlation matrix for column 0 and and column2587: -0.007373878673891587
2588
Correlation matrix for column 0 and and column2588: 0.001343740854873644
2589
Correlation matrix for column 0 and and column2589: -0.011782204640146116
2590
Correlation matrix for column 0 and and column2590: 0.017691803724552407
2591
Correlation matrix for column 0 and and column2591: -0.0005837715920533217
2592
Correlation matrix for column 0 and and column2592: -0.019861500067685865
2593
Correlation matrix for column 0 and and column2593: 0.012249092618453245
2594
Correlation matrix for column 0 and and column2594: -0.023093958940539366
2595
Correlation matrix for column 0 and and column2595: -0.006026889134501021
2596
Correlation matrix for column 0 and and column2596: -0.005877755519798926
2597
Correlation matrix for column 0 and and column2597: 0.024157425716513498
2598
Correlation matrix for column 0 and and column2598: 0.00

Correlation matrix for column 0 and and column2811: 0.0036130531124463805
2812
Correlation matrix for column 0 and and column2812: -0.003592921945272031
2813
Correlation matrix for column 0 and and column2813: 0.003979161500159212
2814
Correlation matrix for column 0 and and column2814: -0.016494224235839832
2815
Correlation matrix for column 0 and and column2815: 0.014507270473395513
2816
Correlation matrix for column 0 and and column2816: -0.004716625780265104
2817
Correlation matrix for column 0 and and column2817: -0.023646315094297633
2818
Correlation matrix for column 0 and and column2818: 0.0029825801167627252
2819
Correlation matrix for column 0 and and column2819: 0.017964453166210204
2820
Correlation matrix for column 0 and and column2820: -0.025387418204756138
2821
Correlation matrix for column 0 and and column2821: -0.04137206286997059
2822
Correlation matrix for column 0 and and column2822: -0.005458150541844989
2823
Correlation matrix for column 0 and and column2823: -0.0

Correlation matrix for column 0 and and column3027: 0.014890398877955395
3028
Correlation matrix for column 0 and and column3028: 0.011825662289257074
3029
Correlation matrix for column 0 and and column3029: -0.03359337586762486
3030
Correlation matrix for column 0 and and column3030: 0.00391396016647749
3031
Correlation matrix for column 0 and and column3031: 0.029815202503766656
3032
Correlation matrix for column 0 and and column3032: -0.012177868798704167
3033
Correlation matrix for column 0 and and column3033: -0.0295576149031665
3034
Correlation matrix for column 0 and and column3034: -0.01116273582205479
3035
Correlation matrix for column 0 and and column3035: 0.01529408874598357
3036
Correlation matrix for column 0 and and column3036: -0.02519729129615026
3037
Correlation matrix for column 0 and and column3037: 0.009949578891739492
3038
Correlation matrix for column 0 and and column3038: -0.01931966346481365
3039
Correlation matrix for column 0 and and column3039: 0.046031958140

Correlation matrix for column 0 and and column3239: -0.022971430492910998
3240
Correlation matrix for column 0 and and column3240: -0.024701045674190115
3241
Correlation matrix for column 0 and and column3241: -0.049295292860005564
3242
Correlation matrix for column 0 and and column3242: 0.027956848117280535
3243
Correlation matrix for column 0 and and column3243: -0.0035982661975231724
3244
Correlation matrix for column 0 and and column3244: -0.005535864623632971
3245
Correlation matrix for column 0 and and column3245: 0.03598618180690464
3246
Correlation matrix for column 0 and and column3246: 0.016871832506975003
3247
Correlation matrix for column 0 and and column3247: 0.0059945500296168085
3248
Correlation matrix for column 0 and and column3248: 0.006853303779612508
3249
Correlation matrix for column 0 and and column3249: -0.0029795338568905337
3250
Correlation matrix for column 0 and and column3250: -0.012299035684189169
3251
Correlation matrix for column 0 and and column3251: 0.0

Correlation matrix for column 0 and and column3459: 0.017208224510842935
3460
Correlation matrix for column 0 and and column3460: -0.0020427333976521665
3461
Correlation matrix for column 0 and and column3461: -0.016081636987128025
3462
Correlation matrix for column 0 and and column3462: -0.028989502807601583
3463
Correlation matrix for column 0 and and column3463: 0.0036995101982770988
3464
Correlation matrix for column 0 and and column3464: -0.002053254886047618
3465
Correlation matrix for column 0 and and column3465: -0.021366291527600362
3466
Correlation matrix for column 0 and and column3466: -0.02118657996095115
3467
Correlation matrix for column 0 and and column3467: 0.010959134734543796
3468
Correlation matrix for column 0 and and column3468: 0.008296050051374656
3469
Correlation matrix for column 0 and and column3469: -0.008804776505804766
3470
Correlation matrix for column 0 and and column3470: -0.01968218650793005
3471
Correlation matrix for column 0 and and column3471: 0.00

Correlation matrix for column 0 and and column3680: 0.0059892839302628335
3681
Correlation matrix for column 0 and and column3681: -0.0028032710961637466
3682
Correlation matrix for column 0 and and column3682: -0.0004713572864896249
3683
Correlation matrix for column 0 and and column3683: -0.01717532642177094
3684
Correlation matrix for column 0 and and column3684: -0.009945469382605206
3685
Correlation matrix for column 0 and and column3685: 0.001157267210016975
3686
Correlation matrix for column 0 and and column3686: -0.024127136867758685
3687
Correlation matrix for column 0 and and column3687: -0.007278441688479486
3688
Correlation matrix for column 0 and and column3688: -0.06616500225289808
3689
Correlation matrix for column 0 and and column3689: 0.031939658784941455
3690
Correlation matrix for column 0 and and column3690: 0.026967598811594943
3691
Correlation matrix for column 0 and and column3691: 0.009467678214454708
3692
Correlation matrix for column 0 and and column3692: -0.0

Correlation matrix for column 0 and and column3900: -0.005996486726399899
3901
Correlation matrix for column 0 and and column3901: -0.0010001158811526174
3902
Correlation matrix for column 0 and and column3902: -0.008852631621590262
3903
Correlation matrix for column 0 and and column3903: -0.005586304644199744
3904
Correlation matrix for column 0 and and column3904: 0.02171602977869149
3905
Correlation matrix for column 0 and and column3905: -0.001352755342219519
3906
Correlation matrix for column 0 and and column3906: -0.05126093653651872
3907
Correlation matrix for column 0 and and column3907: 0.001077814529984755
3908
Correlation matrix for column 0 and and column3908: -0.012057612146025572
3909
Correlation matrix for column 0 and and column3909: 0.011748428275836432
3910
Correlation matrix for column 0 and and column3910: 0.0015829658073794183
3911
Correlation matrix for column 0 and and column3911: 0.014645225901621272
3912
Correlation matrix for column 0 and and column3912: 0.006

Correlation matrix for column 0 and and column4127: -0.013132566658463112
4128
Correlation matrix for column 0 and and column4128: 0.006109304016393542
4129
Correlation matrix for column 0 and and column4129: -0.01825576009341035
4130
Correlation matrix for column 0 and and column4130: 0.017007407601048533
4131
Correlation matrix for column 0 and and column4131: -0.0055617135304173605
4132
Correlation matrix for column 0 and and column4132: -0.013080475666660428
4133
Correlation matrix for column 0 and and column4133: 0.011908923404267548
4134
Correlation matrix for column 0 and and column4134: -0.003764442416822518
4135
Correlation matrix for column 0 and and column4135: -0.02105869774652873
4136
Correlation matrix for column 0 and and column4136: -0.01895576568373714
4137
Correlation matrix for column 0 and and column4137: 0.005992450317380221
4138
Correlation matrix for column 0 and and column4138: -0.004256035858778496
4139
Correlation matrix for column 0 and and column4139: -0.001

Correlation matrix for column 0 and and column4334: 0.009631754118794083
4335
Correlation matrix for column 0 and and column4335: -0.01603658612463695
4336
Correlation matrix for column 0 and and column4336: -0.02990845265162708
4337
Correlation matrix for column 0 and and column4337: -0.012921903263965976
4338
Correlation matrix for column 0 and and column4338: -0.04892709622024228
4339
Correlation matrix for column 0 and and column4339: 0.013746768872687557
4340
Correlation matrix for column 0 and and column4340: 0.04472473075861766
4341
Correlation matrix for column 0 and and column4341: 0.010077705597810049
4342
Correlation matrix for column 0 and and column4342: 0.007806643712969278
4343
Correlation matrix for column 0 and and column4343: 0.03424839860711084
4344
Correlation matrix for column 0 and and column4344: 0.004287696800098133
4345
Correlation matrix for column 0 and and column4345: 0.03149498247312403
4346
Correlation matrix for column 0 and and column4346: 0.004202039795

Correlation matrix for column 0 and and column4540: 0.009573213319126936
4541
Correlation matrix for column 0 and and column4541: -0.01934385016423825
4542
Correlation matrix for column 0 and and column4542: -0.0003068444871431364
4543
Correlation matrix for column 0 and and column4543: -0.01939919916646824
4544
Correlation matrix for column 0 and and column4544: 0.015122291857469683
4545
Correlation matrix for column 0 and and column4545: 0.004554345760211088
4546
Correlation matrix for column 0 and and column4546: -0.012648935642271595
4547
Correlation matrix for column 0 and and column4547: -0.01691491761317032
4548
Correlation matrix for column 0 and and column4548: 0.0035320125300922997
4549
Correlation matrix for column 0 and and column4549: -0.023288055031750323
4550
Correlation matrix for column 0 and and column4550: -0.03134781807953255
4551
Correlation matrix for column 0 and and column4551: 0.011341937863505382
4552
Correlation matrix for column 0 and and column4552: 0.03471

Correlation matrix for column 0 and and column4744: -0.0046263565118935725
4745
Correlation matrix for column 0 and and column4745: 0.014280758660198109
4746
Correlation matrix for column 0 and and column4746: -0.004207459516610732
4747
Correlation matrix for column 0 and and column4747: 0.027838471044873043
4748
Correlation matrix for column 0 and and column4748: 0.009719790139436156
4749
Correlation matrix for column 0 and and column4749: -0.0028567437087657767
4750
Correlation matrix for column 0 and and column4750: -0.01507326520489685
4751
Correlation matrix for column 0 and and column4751: -0.020295156775439593
4752
Correlation matrix for column 0 and and column4752: -0.012092699151164637
4753
Correlation matrix for column 0 and and column4753: 0.018606235504085005
4754
Correlation matrix for column 0 and and column4754: 0.011455739170914529
4755
Correlation matrix for column 0 and and column4755: 0.018210500067199483
4756
Correlation matrix for column 0 and and column4756: -0.01

Correlation matrix for column 0 and and column4957: -0.01692178444768655
4958
Correlation matrix for column 0 and and column4958: -0.017327235523959185
4959
Correlation matrix for column 0 and and column4959: 0.010301319931704331
4960
Correlation matrix for column 0 and and column4960: 0.003638369585444255
4961
Correlation matrix for column 0 and and column4961: -0.004881781594400252
4962
Correlation matrix for column 0 and and column4962: 0.051662158542137196
4963
Correlation matrix for column 0 and and column4963: -0.010105425136909157
4964
Correlation matrix for column 0 and and column4964: -0.011651609210318645
4965
Correlation matrix for column 0 and and column4965: -0.019512968776711468
4966
Correlation matrix for column 0 and and column4966: 0.006386719695393348
4967
Correlation matrix for column 0 and and column4967: -0.01866394660813575
4968
Correlation matrix for column 0 and and column4968: 0.006633664420966414
4969
Correlation matrix for column 0 and and column4969: -0.0159

Correlation matrix for column 0 and and column5152: -0.011617782874370786
5153
Correlation matrix for column 0 and and column5153: 0.01680960529413941
5154
Correlation matrix for column 0 and and column5154: 0.007315811125998541
5155
Correlation matrix for column 0 and and column5155: -0.01981466168934719
5156
Correlation matrix for column 0 and and column5156: -0.0403154271162453
5157
Correlation matrix for column 0 and and column5157: 0.011524199265271264
5158
Correlation matrix for column 0 and and column5158: -2.7058927741559073e-05
5159
Correlation matrix for column 0 and and column5159: 0.01256013739813732
5160
Correlation matrix for column 0 and and column5160: -0.03618743553686998
5161
Correlation matrix for column 0 and and column5161: -0.002072295673610144
5162
Correlation matrix for column 0 and and column5162: 0.008475734188717218
5163
Correlation matrix for column 0 and and column5163: -0.011204665862111595
5164
Correlation matrix for column 0 and and column5164: -0.010915

Correlation matrix for column 0 and and column5363: -0.009600030331231038
5364
Correlation matrix for column 0 and and column5364: 0.02085146759704045
5365
Correlation matrix for column 0 and and column5365: -0.0026175525915233446
5366
Correlation matrix for column 0 and and column5366: -0.023994424258571494
5367
Correlation matrix for column 0 and and column5367: 0.03199011020661376
5368
Correlation matrix for column 0 and and column5368: -0.004167562260789857
5369
Correlation matrix for column 0 and and column5369: -0.01248742867618957
5370
Correlation matrix for column 0 and and column5370: -0.012615249707108995
5371
Correlation matrix for column 0 and and column5371: -0.0010980943615803939
5372
Correlation matrix for column 0 and and column5372: -0.03424368704065203
5373
Correlation matrix for column 0 and and column5373: -0.0017848550384079404
5374
Correlation matrix for column 0 and and column5374: 0.014624885577362138
5375
Correlation matrix for column 0 and and column5375: -0.0

5573
Correlation matrix for column 0 and and column5573: 0.009858360851439586
5574
Correlation matrix for column 0 and and column5574: 0.01810291475614147
5575
Correlation matrix for column 0 and and column5575: -0.0171470559266793
5576
Correlation matrix for column 0 and and column5576: 0.0010623511878202116
5577
Correlation matrix for column 0 and and column5577: -0.0028548486915197496
5578
Correlation matrix for column 0 and and column5578: 0.0022527927586242606
5579
Correlation matrix for column 0 and and column5579: -0.0037144746863758175
5580
Correlation matrix for column 0 and and column5580: -0.03333051191336564
5581
Correlation matrix for column 0 and and column5581: -0.02450259036586505
5582
Correlation matrix for column 0 and and column5582: 0.006070760170461211
5583
Correlation matrix for column 0 and and column5583: -0.003699317414209932
5584
Correlation matrix for column 0 and and column5584: -0.002572090810387615
5585
Correlation matrix for column 0 and and column5585: 0

Correlation matrix for column 0 and and column5780: -0.011940466195940231
5781
Correlation matrix for column 0 and and column5781: 0.02019502554327025
5782
Correlation matrix for column 0 and and column5782: -0.005719675398677922
5783
Correlation matrix for column 0 and and column5783: -0.022639050330492686
5784
Correlation matrix for column 0 and and column5784: 0.012687082558469676
5785
Correlation matrix for column 0 and and column5785: -0.004625636467375396
5786
Correlation matrix for column 0 and and column5786: 0.008192539403890838
5787
Correlation matrix for column 0 and and column5787: 0.022877431682509903
5788
Correlation matrix for column 0 and and column5788: 0.003660014564384435
5789
Correlation matrix for column 0 and and column5789: 0.009708345257186248
5790
Correlation matrix for column 0 and and column5790: -0.005560293995695366
5791
Correlation matrix for column 0 and and column5791: 0.004713112623977795
5792
Correlation matrix for column 0 and and column5792: -0.00404

Correlation matrix for column 0 and and column5967: -0.012172312765130772
5968
Correlation matrix for column 0 and and column5968: -0.0101237207311021
5969
Correlation matrix for column 0 and and column5969: 0.02200205271924951
5970
Correlation matrix for column 0 and and column5970: -0.0020212209453467425
5971
Correlation matrix for column 0 and and column5971: 0.011985425610328788
5972
Correlation matrix for column 0 and and column5972: -0.022680874804736947
5973
Correlation matrix for column 0 and and column5973: -0.060552281595023016
5974
Correlation matrix for column 0 and and column5974: -0.0007802005034754963
5975
Correlation matrix for column 0 and and column5975: -0.019603345499519118
5976
Correlation matrix for column 0 and and column5976: 0.005318767521243783
5977
Correlation matrix for column 0 and and column5977: -0.025575141055709266
5978
Correlation matrix for column 0 and and column5978: 0.004639588746037472
5979
Correlation matrix for column 0 and and column5979: -0.02

Correlation matrix for column 0 and and column6170: 0.023011297373736937
6171
Correlation matrix for column 0 and and column6171: -0.02205341835484456
6172
Correlation matrix for column 0 and and column6172: 0.005782378907937352
6173
Correlation matrix for column 0 and and column6173: 0.02105489000013474
6174
Correlation matrix for column 0 and and column6174: 0.0024952092600373957
6175
Correlation matrix for column 0 and and column6175: 0.013096159427705782
6176
Correlation matrix for column 0 and and column6176: 0.008681489737732185
6177
Correlation matrix for column 0 and and column6177: -0.009005952331163603
6178
Correlation matrix for column 0 and and column6178: 0.004043879345317742
6179
Correlation matrix for column 0 and and column6179: -0.025375394222687964
6180
Correlation matrix for column 0 and and column6180: 0.0012021762382094122
6181
Correlation matrix for column 0 and and column6181: -0.032515461034041536
6182
Correlation matrix for column 0 and and column6182: -0.01964

Correlation matrix for column 0 and and column6381: -0.006278303480111952
6382
Correlation matrix for column 0 and and column6382: -0.00239620896095965
6383
Correlation matrix for column 0 and and column6383: 0.016325559937865483
6384
Correlation matrix for column 0 and and column6384: -0.012241548367940029
6385
Correlation matrix for column 0 and and column6385: 0.026364273550145385
6386
Correlation matrix for column 0 and and column6386: -0.020229629851667003
6387
Correlation matrix for column 0 and and column6387: 0.01926477466854275
6388
Correlation matrix for column 0 and and column6388: 0.022808890171877563
6389
Correlation matrix for column 0 and and column6389: -0.022870991132964237
6390
Correlation matrix for column 0 and and column6390: 0.023776814256579
6391
Correlation matrix for column 0 and and column6391: 0.008223905927407871
6392
Correlation matrix for column 0 and and column6392: -0.007902038805812063
6393
Correlation matrix for column 0 and and column6393: 0.005748938

Correlation matrix for column 0 and and column6589: -0.028259640859785724
6590
Correlation matrix for column 0 and and column6590: 0.029001215780350472
6591
Correlation matrix for column 0 and and column6591: 0.012158871878009552
6592
Correlation matrix for column 0 and and column6592: 0.012347831907598692
6593
Correlation matrix for column 0 and and column6593: -0.005641140195017014
6594
Correlation matrix for column 0 and and column6594: 0.022508247748684745
6595
Correlation matrix for column 0 and and column6595: -0.01772770766564567
6596
Correlation matrix for column 0 and and column6596: -0.012595446593322563
6597
Correlation matrix for column 0 and and column6597: -0.050615182161257745
6598
Correlation matrix for column 0 and and column6598: 0.014955090598924272
6599
Correlation matrix for column 0 and and column6599: 0.025068813879935266
6600
Correlation matrix for column 0 and and column6600: -0.024877985411756287
6601
Correlation matrix for column 0 and and column6601: -0.0144

Correlation matrix for column 0 and and column6783: 0.016591727866835194
6784
Correlation matrix for column 0 and and column6784: 0.008673278630675773
6785
Correlation matrix for column 0 and and column6785: -0.016084457555259692
6786
Correlation matrix for column 0 and and column6786: -0.005278462747842087
6787
Correlation matrix for column 0 and and column6787: 0.005515037322150512
6788
Correlation matrix for column 0 and and column6788: 0.005016837742156736
6789
Correlation matrix for column 0 and and column6789: -0.024242184998222335
6790
Correlation matrix for column 0 and and column6790: 0.009143451607395428
6791
Correlation matrix for column 0 and and column6791: -0.013755988457618176
6792
Correlation matrix for column 0 and and column6792: -0.0025099724230065517
6793
Correlation matrix for column 0 and and column6793: -0.02281799341029098
6794
Correlation matrix for column 0 and and column6794: -0.01310780025078003
6795
Correlation matrix for column 0 and and column6795: 0.0362

In [11]:
a = 0
b = 0
for i in range(0,len(list_corr_df1)):
    if abs(list_corr_df1[i]) > 0.01:
        a+=1
    else:
        pass
        
for i in range(0,len(list_corr_df_new)):
    if abs(list_corr_df_new[i]) > 0.01:
        b+=1
    else:
        pass

print("Percentage of correlations in df1:",(a/len(list_corr_df1)*100),"%")

print("Percentage of correlations in df_new:",(b/len(list_corr_df_new)*100),"%")

Percentage of correlations in df1: 59.4 %
Percentage of correlations in df_new: 57.90701063984842 %


## Random Projection eps = 0.5

In [12]:
# Pass df1 in the random projection to create a new reduced DataFrame
transformer = random_projection.GaussianRandomProjection(eps = 0.5)
df_new = pd.DataFrame(transformer.fit_transform(df))
df_new

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,374,375,376,377,378,379,380,381,382,383
0,9.051175,2.734645,6.134172,-9.695326,23.409298,6.064939,-17.543907,12.740925,-1.880832,-0.190651,...,-8.865914,-1.962840,-2.411005,-8.921102,15.714008,-2.093235,-13.496478,7.262677,-2.951847,2.327596
1,14.796975,5.099036,-5.473505,-22.107477,17.870463,8.264337,-8.628424,-2.894208,-5.085841,5.569249,...,-6.472948,11.817654,-9.534527,-18.334622,14.837680,10.011671,-2.855225,4.921330,5.249986,6.849455
2,22.330177,6.678199,10.042658,-12.072406,18.630407,25.706249,-1.000871,-2.046941,-0.485979,-2.685071,...,-1.537920,12.076317,9.950044,-12.574096,17.454165,2.746120,2.762049,11.249349,-7.456565,-6.326917
3,30.095218,2.541950,-0.543899,-14.564748,28.812978,3.582643,-8.050442,10.375794,9.105687,-12.204542,...,-4.694587,12.322169,10.336186,-10.436976,13.008643,4.110180,-24.804751,7.296541,6.301042,-9.289291
4,18.285329,-4.264674,3.605150,-8.675794,19.058367,11.777146,1.766017,-2.507607,12.518601,8.794436,...,-8.222054,-1.136553,-19.292105,0.331918,16.928187,5.105199,-14.481138,2.766910,-3.638927,-6.687716
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,37.282151,12.561973,-5.080593,-19.780263,31.040314,-4.900184,-6.097092,-15.672205,7.716207,3.757650,...,-15.852539,10.871527,-0.904798,-23.506229,17.077058,5.312495,0.051885,7.717797,-4.404610,8.281566
2996,16.771059,11.094214,2.274702,-7.451244,19.282350,22.071352,-4.595942,12.352312,14.196392,-3.048889,...,-2.369487,9.910552,0.553535,-1.714025,-2.265168,1.718533,-4.227057,6.290829,-0.128887,9.361113
2997,25.327407,6.774956,7.464888,-2.628772,-2.811165,17.598037,-18.228522,-6.974284,12.502186,-4.709164,...,-13.247753,10.516341,-2.449457,8.715992,19.843068,0.035042,-1.784472,11.840252,-13.603332,7.427767
2998,16.222858,-5.813628,4.382686,-15.556740,6.932617,12.376083,-7.581255,-10.088096,13.631143,-0.149013,...,-1.443008,-1.369358,5.044531,-9.277657,22.845594,1.183516,-13.376203,8.177053,-0.724078,8.231222


In [13]:
# Assigning X to all columns except 0
X_df_new = df_new.drop(columns=0)

# Assigning Y to column 0
Y_df_new = df_new[0]
print(Y_df_new)

# The following line makes Y become a list
Y_df_new = np.array(Y_df_new).reshape(-1)
print(X_df_new.shape,Y_df_new.shape)

list_titles = X_df_new.columns

list_corr_df_new = []
for i in list_titles[0:1000]:
    print(i)
    list_corr_df_new.append(abs(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))

0        9.051175
1       14.796975
2       22.330177
3       30.095218
4       18.285329
          ...    
2995    37.282151
2996    16.771059
2997    25.327407
2998    16.222858
2999    21.433421
Name: 0, Length: 3000, dtype: float64
(3000, 383) (3000,)
1
Correlation matrix for column 0 and and column1: -0.021726100447125515
2
Correlation matrix for column 0 and and column2: -0.002076543135809714
3
Correlation matrix for column 0 and and column3: 0.013590807145479849
4
Correlation matrix for column 0 and and column4: 0.006390496708554146
5
Correlation matrix for column 0 and and column5: -0.018449853262436623
6
Correlation matrix for column 0 and and column6: 0.0325305567238853
7
Correlation matrix for column 0 and and column7: 0.010286894346530372
8
Correlation matrix for column 0 and and column8: -0.005248428250060606
9
Correlation matrix for column 0 and and column9: -0.00437689015301717
10
Correlation matrix for column 0 and and column10: -0.01304760161007457
11
Correlation matri

205
Correlation matrix for column 0 and and column205: 0.013184771995468251
206
Correlation matrix for column 0 and and column206: 0.01613488228168997
207
Correlation matrix for column 0 and and column207: 0.0029065494037930186
208
Correlation matrix for column 0 and and column208: 0.01611501024477034
209
Correlation matrix for column 0 and and column209: 0.004310549695605649
210
Correlation matrix for column 0 and and column210: 0.03689815958557052
211
Correlation matrix for column 0 and and column211: 0.017140494789572556
212
Correlation matrix for column 0 and and column212: 0.01045506822312899
213
Correlation matrix for column 0 and and column213: 0.008523975724056549
214
Correlation matrix for column 0 and and column214: 0.018124158198830707
215
Correlation matrix for column 0 and and column215: -0.007114864782542324
216
Correlation matrix for column 0 and and column216: 0.012522058568514027
217
Correlation matrix for column 0 and and column217: 0.015375759614584915
218
Correlatio

In [14]:
a = 0
b = 0
for i in range(0,len(list_corr_df1)):
    if abs(list_corr_df1[i]) > 0.01:
        a+=1
    else:
        pass
        
for i in range(0,len(list_corr_df_new)):
    if abs(list_corr_df_new[i]) > 0.01:
        b+=1
    else:
        pass

print("Percentage of correlations in df1:",(a/len(list_corr_df1)*100),"%")

print("Percentage of correlations in df_new:",(b/len(list_corr_df_new)*100),"%")

Percentage of correlations in df1: 59.4 %
Percentage of correlations in df_new: 59.268929503916446 %
