# Spurious Correlations of Highly Dimensional Big Data

This Notebook aims at showing how PCA and random projection can solve the problem of spurious correlations in Big Data.

In [30]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn import random_projection
from sklearn.utils import shuffle

## DataFrame Creation

In [31]:
# User defined parameters

# Number of rows for df1
x = 10000

# Number of columns for df1
y = 100000

# Number of rows for df2
z = 1000

In [36]:
# Creating a dataframe with x number of rows and y number of columns
df = pd.DataFrame(np.random.random_sample((x,y)))
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,99990,99991,99992,99993,99994,99995,99996,99997,99998,99999
0,872073747,903989380,399023275,322626451,903868902,422889755,897827922,805199782,229175345,511026146,...,133433026,825454701,766832076,10835451,206857799,931519124,445493385,13627321,377941084,817978136
1,251996626,669423058,491030335,854142032,533412632,626001454,75446068,592738491,887562369,714723181,...,313840734,324561207,408764051,863633024,314889605,43419079,419684045,477406756,372310181,778881042
2,800147014,270278478,983887160,610627664,829790184,928872588,489648784,827890245,735445785,465357517,...,608689400,471126319,676389541,976022363,864628482,900497500,233353796,703336368,306857246,156934970
3,369040633,924040758,366930105,877373262,218460705,846898430,400966572,800624566,794508885,801324080,...,549279389,321644961,732831867,633996286,275403484,751542273,599847548,772548265,392615921,228802836
4,921658086,345862113,778982241,822135816,560833148,581721136,50110312,814896665,444392277,937644294,...,430882688,555197511,324569834,310602591,743285371,439774371,201151497,781762801,439004191,139856025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,989516791,588800456,813003869,582251474,316819382,1515147,395106853,813570993,838907090,819573901,...,833805075,257319916,174779103,627769131,12046735,235990125,413159033,656847651,968694827,786967537
9996,27634483,493612308,876587248,731600037,625731333,527629344,100842143,504143201,204138615,885529197,...,453942118,886309836,799357706,531693919,292068168,630295599,928169481,485302982,82198583,723431879
9997,750006193,671354864,517559252,208936126,911260943,156393178,372300507,272729473,511446522,456947097,...,3170074,6735126,750184716,735732214,765588918,809738903,26584129,301932965,280348149,83960879
9998,192802361,50790408,852532781,588893256,915822135,512278889,297760715,357072003,51384561,46057913,...,29491149,659582459,220212090,553389435,883978012,568571034,517268231,506756297,733520196,90662951


In order to assess the correlations of the different parameters, correlations between the column with index 0 and the 999 other first columns is assessed.

In [37]:
# Assigning X to all columns except 0
X_df = df.drop(columns=0)
X_df.head()

# Assigning Y to column 0
Y_df = df[0]
print(Y_df)

# The following line makes Y become a list
Y_df = np.array(Y_df).reshape(-1)
print(X_df.shape,Y_df.shape)

list_titles = X_df.columns
list_corr_df1 = []
for i in list_titles[0:1000]:
    print(i)
    list_corr_df1.append(abs(np.corrcoef(Y_df, X_df[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df, X_df[i])[0][1]))

0       872073747
1       251996626
2       800147014
3       369040633
4       921658086
          ...    
9995    989516791
9996     27634483
9997    750006193
9998    192802361
9999    977376776
Name: 0, Length: 10000, dtype: int32
(10000, 99999) (10000,)
1
Correlation matrix for column 0 and and column1: 0.009214686148022405
2
Correlation matrix for column 0 and and column2: -0.004699783904039144
3
Correlation matrix for column 0 and and column3: -0.002340682167029814
4
Correlation matrix for column 0 and and column4: 0.006211659613111897
5
Correlation matrix for column 0 and and column5: 0.008764603805628537
6
Correlation matrix for column 0 and and column6: -0.01541585269586246
7
Correlation matrix for column 0 and and column7: -0.014414214959192975
8
Correlation matrix for column 0 and and column8: 0.007030649427337994
9
Correlation matrix for column 0 and and column9: -0.020632535980945412
10
Correlation matrix for column 0 and and column10: 0.004395901380280515
11
Correlation 

Correlation matrix for column 0 and and column162: 0.006053835748215617
163
Correlation matrix for column 0 and and column163: -0.0011485799062097494
164
Correlation matrix for column 0 and and column164: -0.010670618891757395
165
Correlation matrix for column 0 and and column165: 0.0024446264894044836
166
Correlation matrix for column 0 and and column166: -0.01661426306312305
167
Correlation matrix for column 0 and and column167: -0.006189435812169962
168
Correlation matrix for column 0 and and column168: -0.014311086811888215
169
Correlation matrix for column 0 and and column169: -0.00468614250313615
170
Correlation matrix for column 0 and and column170: 0.007427883217591787
171
Correlation matrix for column 0 and and column171: 0.0146069968752276
172
Correlation matrix for column 0 and and column172: 0.010486903281722469
173
Correlation matrix for column 0 and and column173: 0.00659219028734809
174
Correlation matrix for column 0 and and column174: -0.016140625173025187
175
Correlat

Correlation matrix for column 0 and and column366: -0.006948751432275262
367
Correlation matrix for column 0 and and column367: -0.0132474376663599
368
Correlation matrix for column 0 and and column368: -0.0014796331085645618
369
Correlation matrix for column 0 and and column369: 0.006713717629576004
370
Correlation matrix for column 0 and and column370: 0.00011156860240021158
371
Correlation matrix for column 0 and and column371: 0.004992895379231731
372
Correlation matrix for column 0 and and column372: -7.181595021480045e-06
373
Correlation matrix for column 0 and and column373: 0.01652908219450314
374
Correlation matrix for column 0 and and column374: -0.008286831792443312
375
Correlation matrix for column 0 and and column375: -0.0017227871660079367
376
Correlation matrix for column 0 and and column376: -0.005139346244419526
377
Correlation matrix for column 0 and and column377: 0.0005259825647747002
378
Correlation matrix for column 0 and and column378: -0.008038446500309037
379
C

Correlation matrix for column 0 and and column600: -0.002450860829938135
601
Correlation matrix for column 0 and and column601: 0.010873444571382737
602
Correlation matrix for column 0 and and column602: -0.007012857082526718
603
Correlation matrix for column 0 and and column603: -0.0006559434585875003
604
Correlation matrix for column 0 and and column604: -0.003090344748838689
605
Correlation matrix for column 0 and and column605: 2.6265345947353333e-07
606
Correlation matrix for column 0 and and column606: 0.011302098227770351
607
Correlation matrix for column 0 and and column607: 0.0023501023858363604
608
Correlation matrix for column 0 and and column608: 0.0159633902888563
609
Correlation matrix for column 0 and and column609: 0.006582234102554061
610
Correlation matrix for column 0 and and column610: -0.012613950742923206
611
Correlation matrix for column 0 and and column611: -0.019169716637810794
612
Correlation matrix for column 0 and and column612: 0.002912876994351365
613
Corr

Correlation matrix for column 0 and and column869: 0.0067127012871213134
870
Correlation matrix for column 0 and and column870: 0.0022245022352111645
871
Correlation matrix for column 0 and and column871: 0.00038827017858548686
872
Correlation matrix for column 0 and and column872: -0.014832109308119734
873
Correlation matrix for column 0 and and column873: -0.014486146210055294
874
Correlation matrix for column 0 and and column874: 0.0010291272735348318
875
Correlation matrix for column 0 and and column875: 0.0033195348506298164
876
Correlation matrix for column 0 and and column876: -0.002851677757818628
877
Correlation matrix for column 0 and and column877: -0.006448493683756261
878
Correlation matrix for column 0 and and column878: -0.008340858087023645
879
Correlation matrix for column 0 and and column879: -0.0013503943607910318
880
Correlation matrix for column 0 and and column880: -0.005616763626756485
881
Correlation matrix for column 0 and and column881: 0.013801089440042907
88

To see if indeed the correlations between the different parameters increases with the data size, the original dataframe is compared to a sub-set dataframe which only takes the first z rows of the original dataframe. If the correlations in the original dataframe are higher than in the smaller dataframe this would prove that the bigger the data size the more frequent the number of spurious correlations.

In [38]:
# Creating smaller dataframe taking z number of rows from original dataframe
df2 = df.iloc[:z]
df2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,99990,99991,99992,99993,99994,99995,99996,99997,99998,99999
0,872073747,903989380,399023275,322626451,903868902,422889755,897827922,805199782,229175345,511026146,...,133433026,825454701,766832076,10835451,206857799,931519124,445493385,13627321,377941084,817978136
1,251996626,669423058,491030335,854142032,533412632,626001454,75446068,592738491,887562369,714723181,...,313840734,324561207,408764051,863633024,314889605,43419079,419684045,477406756,372310181,778881042
2,800147014,270278478,983887160,610627664,829790184,928872588,489648784,827890245,735445785,465357517,...,608689400,471126319,676389541,976022363,864628482,900497500,233353796,703336368,306857246,156934970
3,369040633,924040758,366930105,877373262,218460705,846898430,400966572,800624566,794508885,801324080,...,549279389,321644961,732831867,633996286,275403484,751542273,599847548,772548265,392615921,228802836
4,921658086,345862113,778982241,822135816,560833148,581721136,50110312,814896665,444392277,937644294,...,430882688,555197511,324569834,310602591,743285371,439774371,201151497,781762801,439004191,139856025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,681827308,993769623,748500846,880349312,281196327,667749991,601285751,443090504,487856962,399579716,...,409052583,190118616,101402125,871373827,611851918,950656379,708072600,43077036,41144198,379249405
996,627954677,613420423,721230472,500996008,486878176,446455796,375230509,376750873,166377903,157493493,...,798718031,472193464,845074054,118360442,169690367,68142071,889305981,960047508,858289129,98144765
997,550699866,111410306,841756363,827401903,637233704,168056529,626458043,383535784,747102310,708422105,...,331355846,705374592,726797332,289851624,707528179,546286137,817608175,965876524,7138435,28713759
998,942423984,782420020,703099967,462299842,852726621,291056987,697330155,484474831,357348272,861698445,...,732877085,329826432,885677993,206279618,968955870,572671296,372475105,759615135,411603355,496028750


In [39]:
# Assigning X to all columns except 0
X_df2 = df2.drop(columns=0)
X_df2.head()

# Assigning Y to column 0
Y_df2 = df2[0]
print(Y_df2)

# The following line makes Y become a list
Y_df2 = np.array(Y_df2).reshape(-1)
print(X_df2.shape,Y_df2.shape)

list_titles = X_df2.columns

list_corr_df2 = []
for i in list_titles[0:1000]:
    print(i)
    list_corr_df2.append(abs(np.corrcoef(Y_df2, X_df2[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df2, X_df2[i])[0][1]))

0      872073747
1      251996626
2      800147014
3      369040633
4      921658086
         ...    
995    681827308
996    627954677
997    550699866
998    942423984
999    104242555
Name: 0, Length: 1000, dtype: int32
(1000, 99999) (1000,)
1
Correlation matrix for column 0 and and column1: 0.012098248479574395
2
Correlation matrix for column 0 and and column2: -0.05338237498813939
3
Correlation matrix for column 0 and and column3: 0.017237391127503325
4
Correlation matrix for column 0 and and column4: -0.029553926292561285
5
Correlation matrix for column 0 and and column5: 0.04249394065802384
6
Correlation matrix for column 0 and and column6: -0.011873210384704598
7
Correlation matrix for column 0 and and column7: -0.004540719982220262
8
Correlation matrix for column 0 and and column8: 0.016629996573800886
9
Correlation matrix for column 0 and and column9: -0.00174830351065248
10
Correlation matrix for column 0 and and column10: 0.01840297971283176
11
Correlation matrix for column

Correlation matrix for column 0 and and column350: -0.010405171172410514
351
Correlation matrix for column 0 and and column351: -0.007488530647778652
352
Correlation matrix for column 0 and and column352: -0.060366017589713786
353
Correlation matrix for column 0 and and column353: -0.032827222072635914
354
Correlation matrix for column 0 and and column354: 0.028978997995679705
355
Correlation matrix for column 0 and and column355: -0.01350955175635475
356
Correlation matrix for column 0 and and column356: 0.04623377609814413
357
Correlation matrix for column 0 and and column357: -0.035932922298957344
358
Correlation matrix for column 0 and and column358: 0.050965274925222126
359
Correlation matrix for column 0 and and column359: 0.030878362972411248
360
Correlation matrix for column 0 and and column360: 0.009376971498979115
361
Correlation matrix for column 0 and and column361: -0.00013509887270518224
362
Correlation matrix for column 0 and and column362: 0.019214390822429695
363
Corre

690
Correlation matrix for column 0 and and column690: -0.013048915235015501
691
Correlation matrix for column 0 and and column691: -0.012005384257473134
692
Correlation matrix for column 0 and and column692: 0.029853652985879227
693
Correlation matrix for column 0 and and column693: 0.004107214879273798
694
Correlation matrix for column 0 and and column694: -0.004539625248131697
695
Correlation matrix for column 0 and and column695: 0.025999032013101477
696
Correlation matrix for column 0 and and column696: -0.0573920500499813
697
Correlation matrix for column 0 and and column697: 0.07241570826692245
698
Correlation matrix for column 0 and and column698: -0.023510022528965814
699
Correlation matrix for column 0 and and column699: 0.06505006429938887
700
Correlation matrix for column 0 and and column700: -0.019890020929558196
701
Correlation matrix for column 0 and and column701: 0.01725419775432365
702
Correlation matrix for column 0 and and column702: -0.017762492019744926
703
Correl

We now compare which of the correlation lists has the highest numbers of every parameter

In [40]:
a = 0
b = 0
for i in range(0,1000):
    if abs(list_corr_df1[i]) > abs(list_corr_df2[i]):
        a+=1
    elif abs(list_corr_df1[i]) < abs(list_corr_df2[i]):
        b+=1
    else:
        print()
        
print(a)
print(b)

177
823


In [48]:
a = 0
b = 0
for i in range(0,len(list_corr_df1)):
    if abs(list_corr_df1[i]) > 0.01:
        a+=1
    else:
        pass
        
for i in range(0,len(list_corr_df2)):
    if abs(list_corr_df2[i]) > 0.01:
        b+=1
    else:
        pass

print("Percentage of correlations in df1:",(a/len(list_corr_df1)*100),"%")

print("Percentage of correlations in df2:",(b/len(list_corr_df2)*100),"%")

Percentage of correlations in df1: 33.4 %
Percentage of correlations in df2: 75.3 %


Clearly it is seen that the lower the number of observations, the higher the frequency of spurious correlations. However the number of spurious correlations present in the bigger DataFrame is still very significant.

To counter spurious correlations, random projection can be used.

## Random Projection eps = 0.1

In [42]:
# Pass df1 in the random projection to create a new reduced DataFrame
transformer = random_projection.GaussianRandomProjection(eps = 0.1)
df_new = pd.DataFrame(transformer.fit_transform(df))
df_new

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7884,7885,7886,7887,7888,7889,7890,7891,7892,7893
0,1.484225e+09,1.118921e+09,-2.539242e+07,2.858366e+09,1.485842e+09,3.361930e+08,8.336228e+08,2.972502e+09,-2.677288e+09,-1.083932e+09,...,4.720754e+08,-7.882851e+08,-5.738120e+08,2.087300e+09,3.583251e+09,-1.144161e+09,-1.709217e+09,-2.926024e+08,-1.467306e+08,1.811614e+09
1,1.199804e+09,-1.651561e+08,-1.022132e+09,3.327215e+09,2.616317e+07,-9.187875e+08,-6.767529e+07,2.450458e+09,-1.679263e+09,-9.509068e+08,...,1.361902e+09,-1.589388e+08,-8.832224e+08,2.119889e+09,8.216936e+08,1.448752e+09,-4.679885e+09,1.645671e+09,2.985250e+09,9.570400e+08
2,2.454021e+09,3.443582e+07,-1.620048e+09,2.758605e+09,5.713138e+08,-1.068899e+09,-9.429875e+08,1.214977e+09,-3.258432e+09,1.257014e+09,...,1.102435e+09,-1.519321e+09,2.381756e+09,-1.639367e+08,6.296378e+08,-2.687804e+08,-6.894021e+07,-2.018966e+09,6.953583e+08,2.672956e+09
3,-2.461729e+08,-1.137156e+09,-6.394306e+08,2.675966e+09,-7.912780e+08,8.355362e+08,-5.486596e+08,5.030936e+09,-2.242501e+09,3.573773e+08,...,1.812725e+08,6.523175e+08,9.685745e+08,3.160620e+09,1.017633e+09,5.871103e+08,-1.881100e+09,-1.028822e+09,1.349449e+09,2.079865e+09
4,1.547519e+09,3.356537e+08,4.750168e+08,2.504782e+09,-6.828670e+08,-4.807127e+08,2.373320e+09,1.781261e+09,-1.448402e+09,-1.631405e+08,...,2.644208e+08,-2.761625e+08,3.904447e+09,2.473243e+09,2.219692e+09,-7.146055e+08,-3.142641e+09,-1.400697e+09,1.214324e+09,-4.036532e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.227441e+09,-1.006630e+09,-3.878772e+08,1.151258e+09,-1.526170e+08,-4.956061e+08,1.067981e+09,1.648397e+09,-1.170723e+09,-9.387459e+08,...,-2.042586e+09,-5.165548e+08,5.761787e+08,1.330836e+09,2.624516e+08,-2.675253e+08,-1.593776e+09,-6.169894e+08,1.817740e+09,1.435026e+09
9996,1.809993e+09,1.857508e+09,-8.716341e+08,1.674449e+09,-5.986082e+08,9.719698e+08,-3.540289e+08,3.517571e+09,-2.814293e+09,-7.144502e+08,...,1.528135e+09,-3.296717e+08,-4.239193e+08,8.359306e+08,1.360881e+09,9.303900e+08,-2.445314e+09,-3.997697e+08,6.027276e+08,3.681683e+09
9997,2.150359e+09,-5.129259e+08,4.738295e+08,4.574023e+09,1.259924e+09,-1.736499e+09,-1.308518e+08,2.352243e+09,-3.500654e+09,3.072573e+08,...,4.146384e+08,-1.188204e+09,7.186123e+08,5.981392e+08,1.677422e+09,1.140628e+09,-2.166499e+09,-2.630649e+08,5.073642e+08,9.522612e+07
9998,2.907970e+09,5.210407e+08,3.061927e+08,4.677330e+08,2.222110e+09,2.107896e+09,-1.050973e+09,4.377731e+09,-2.120965e+09,-3.615958e+08,...,-1.342809e+09,-2.691882e+09,4.061916e+08,1.656485e+09,1.574762e+09,-2.138112e+09,-1.245322e+09,-1.419236e+08,3.199001e+09,2.223739e+09


In [43]:
# Assigning X to all columns except 0
X_df_new = df_new.drop(columns=0)

# Assigning Y to column 0
Y_df_new = df_new[0]
print(Y_df_new)

# The following line makes Y become a list
Y_df_new = np.array(Y_df_new).reshape(-1)
print(X_df_new.shape,Y_df_new.shape)

list_titles = X_df_new.columns

list_corr_df_new = []
for i in list_titles[0:len(list_titles)]:
    print(i)
    list_corr_df_new.append(abs(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))

0       1.484225e+09
1       1.199804e+09
2       2.454021e+09
3      -2.461729e+08
4       1.547519e+09
            ...     
9995    2.227441e+09
9996    1.809993e+09
9997    2.150359e+09
9998    2.907970e+09
9999    2.193476e+09
Name: 0, Length: 10000, dtype: float64
(10000, 7893) (10000,)
1
Correlation matrix for column 0 and and column1: -0.004409411897096787
2
Correlation matrix for column 0 and and column2: 0.009498281154865363
3
Correlation matrix for column 0 and and column3: 0.020610983550470998
4
Correlation matrix for column 0 and and column4: 0.014659360604565987
5
Correlation matrix for column 0 and and column5: 0.0093525507041477
6
Correlation matrix for column 0 and and column6: 0.00034774733992954266
7
Correlation matrix for column 0 and and column7: -0.002454025326846769
8
Correlation matrix for column 0 and and column8: 0.011094174298420918
9
Correlation matrix for column 0 and and column9: -0.01958104920860702
10
Correlation matrix for column 0 and and column10: -0.0

Correlation matrix for column 0 and and column163: -0.0009143525516508765
164
Correlation matrix for column 0 and and column164: -0.0008286256825067659
165
Correlation matrix for column 0 and and column165: 0.012321624161193777
166
Correlation matrix for column 0 and and column166: -0.004202132707499154
167
Correlation matrix for column 0 and and column167: 0.011903238734209736
168
Correlation matrix for column 0 and and column168: 0.0025706793942515222
169
Correlation matrix for column 0 and and column169: 0.00423114936879858
170
Correlation matrix for column 0 and and column170: -0.021458472842614165
171
Correlation matrix for column 0 and and column171: 0.009316311700314986
172
Correlation matrix for column 0 and and column172: 0.007141591762869485
173
Correlation matrix for column 0 and and column173: -0.010814855208838415
174
Correlation matrix for column 0 and and column174: -0.009198702401564562
175
Correlation matrix for column 0 and and column175: -0.00038261170804892286
176
C

Correlation matrix for column 0 and and column349: -0.0003565889475765898
350
Correlation matrix for column 0 and and column350: 0.002212793721584673
351
Correlation matrix for column 0 and and column351: -0.008178706431871918
352
Correlation matrix for column 0 and and column352: 0.017201422514269742
353
Correlation matrix for column 0 and and column353: 0.0021364199071842943
354
Correlation matrix for column 0 and and column354: 0.00220664523825943
355
Correlation matrix for column 0 and and column355: 0.0006033690960652226
356
Correlation matrix for column 0 and and column356: -0.008196223873260576
357
Correlation matrix for column 0 and and column357: 0.014584590061745863
358
Correlation matrix for column 0 and and column358: 0.006435137842075486
359
Correlation matrix for column 0 and and column359: 0.005450621924566075
360
Correlation matrix for column 0 and and column360: 0.015889543301854717
361
Correlation matrix for column 0 and and column361: 0.0010215490608340486
362
Correl

Correlation matrix for column 0 and and column515: -0.0019927437081863333
516
Correlation matrix for column 0 and and column516: 0.006536285746277714
517
Correlation matrix for column 0 and and column517: -0.010325179685710265
518
Correlation matrix for column 0 and and column518: 0.009815452358534347
519
Correlation matrix for column 0 and and column519: -0.004318250547809351
520
Correlation matrix for column 0 and and column520: 0.0004876502626068109
521
Correlation matrix for column 0 and and column521: 0.007757270012863389
522
Correlation matrix for column 0 and and column522: 0.0016109090751307725
523
Correlation matrix for column 0 and and column523: 0.010271626361887793
524
Correlation matrix for column 0 and and column524: -0.011225345212111272
525
Correlation matrix for column 0 and and column525: -0.010929892569516608
526
Correlation matrix for column 0 and and column526: 0.011273366693770307
527
Correlation matrix for column 0 and and column527: -0.01151298271305594
528
Corr

683
Correlation matrix for column 0 and and column683: 0.0024892511727459057
684
Correlation matrix for column 0 and and column684: 0.012008159012962283
685
Correlation matrix for column 0 and and column685: 0.0043852001248863605
686
Correlation matrix for column 0 and and column686: 0.006124033641172733
687
Correlation matrix for column 0 and and column687: 0.0027754983682849238
688
Correlation matrix for column 0 and and column688: 0.00959150740553858
689
Correlation matrix for column 0 and and column689: 0.010197386213335343
690
Correlation matrix for column 0 and and column690: 0.00886006219801557
691
Correlation matrix for column 0 and and column691: 0.0016744549244166731
692
Correlation matrix for column 0 and and column692: 0.0029226701346998624
693
Correlation matrix for column 0 and and column693: -0.007727331330196842
694
Correlation matrix for column 0 and and column694: 0.016128935477464554
695
Correlation matrix for column 0 and and column695: -0.00807694405935388
696
Corr

Correlation matrix for column 0 and and column851: -0.013041301675969315
852
Correlation matrix for column 0 and and column852: -0.01333349328461918
853
Correlation matrix for column 0 and and column853: 0.0024241904355732044
854
Correlation matrix for column 0 and and column854: 0.007444890803842488
855
Correlation matrix for column 0 and and column855: -0.012934548149480165
856
Correlation matrix for column 0 and and column856: 0.004162618835079776
857
Correlation matrix for column 0 and and column857: 0.005400026903963135
858
Correlation matrix for column 0 and and column858: -0.0053738419472473455
859
Correlation matrix for column 0 and and column859: -0.006773304341086519
860
Correlation matrix for column 0 and and column860: 0.012729994135393599
861
Correlation matrix for column 0 and and column861: 0.015697303989463608
862
Correlation matrix for column 0 and and column862: 0.010333978529896263
863
Correlation matrix for column 0 and and column863: 0.008808066017242317
864
Correl

1016
Correlation matrix for column 0 and and column1016: -0.005373850160018177
1017
Correlation matrix for column 0 and and column1017: -0.009608087941744878
1018
Correlation matrix for column 0 and and column1018: 0.00019522897530461017
1019
Correlation matrix for column 0 and and column1019: -0.006447053657957929
1020
Correlation matrix for column 0 and and column1020: 0.011294815113692097
1021
Correlation matrix for column 0 and and column1021: -0.005365215865436388
1022
Correlation matrix for column 0 and and column1022: -0.0014886204545179536
1023
Correlation matrix for column 0 and and column1023: -0.002990062694989928
1024
Correlation matrix for column 0 and and column1024: -0.00015310118146627514
1025
Correlation matrix for column 0 and and column1025: 0.01106910963315845
1026
Correlation matrix for column 0 and and column1026: 0.0009039660853410625
1027
Correlation matrix for column 0 and and column1027: 0.0023177150815366196
1028
Correlation matrix for column 0 and and column

Correlation matrix for column 0 and and column1183: -0.0030995161452928954
1184
Correlation matrix for column 0 and and column1184: 0.00845922192060211
1185
Correlation matrix for column 0 and and column1185: 0.023051554101876637
1186
Correlation matrix for column 0 and and column1186: 0.0076997842899759325
1187
Correlation matrix for column 0 and and column1187: 0.006908412857692226
1188
Correlation matrix for column 0 and and column1188: 0.0018653321794978594
1189
Correlation matrix for column 0 and and column1189: -0.015001129169689348
1190
Correlation matrix for column 0 and and column1190: 0.014397342328498996
1191
Correlation matrix for column 0 and and column1191: 0.002724011456293261
1192
Correlation matrix for column 0 and and column1192: -0.0016838579047763276
1193
Correlation matrix for column 0 and and column1193: 0.00257679950550614
1194
Correlation matrix for column 0 and and column1194: 0.0011931459625632466
1195
Correlation matrix for column 0 and and column1195: 0.0047

Correlation matrix for column 0 and and column1328: 0.010962785066910983
1329
Correlation matrix for column 0 and and column1329: -0.0025802557957403044
1330
Correlation matrix for column 0 and and column1330: -0.006489810012116174
1331
Correlation matrix for column 0 and and column1331: -0.003251663938459666
1332
Correlation matrix for column 0 and and column1332: -0.007812786459486382
1333
Correlation matrix for column 0 and and column1333: -0.00855378071433425
1334
Correlation matrix for column 0 and and column1334: -0.009349350820018287
1335
Correlation matrix for column 0 and and column1335: 0.012045347958880905
1336
Correlation matrix for column 0 and and column1336: -0.005838733342230888
1337
Correlation matrix for column 0 and and column1337: 0.004914769748347394
1338
Correlation matrix for column 0 and and column1338: 0.00035141086630127087
1339
Correlation matrix for column 0 and and column1339: -0.015195411462425324
1340
Correlation matrix for column 0 and and column1340: -0

1469
Correlation matrix for column 0 and and column1469: 0.02118398490629896
1470
Correlation matrix for column 0 and and column1470: 0.023692323600969997
1471
Correlation matrix for column 0 and and column1471: -0.014772182295384422
1472
Correlation matrix for column 0 and and column1472: 0.012193873716263858
1473
Correlation matrix for column 0 and and column1473: -0.00569989676568887
1474
Correlation matrix for column 0 and and column1474: 0.019100554283062423
1475
Correlation matrix for column 0 and and column1475: 0.0001465086073176985
1476
Correlation matrix for column 0 and and column1476: 0.019819999397761058
1477
Correlation matrix for column 0 and and column1477: 0.006070534133162462
1478
Correlation matrix for column 0 and and column1478: -0.00996801015074573
1479
Correlation matrix for column 0 and and column1479: -0.006696506288917616
1480
Correlation matrix for column 0 and and column1480: -0.0010118789829637843
1481
Correlation matrix for column 0 and and column1481: 0.0

Correlation matrix for column 0 and and column1610: -0.021910067563633732
1611
Correlation matrix for column 0 and and column1611: -0.0031669814374410406
1612
Correlation matrix for column 0 and and column1612: -0.008013712010275047
1613
Correlation matrix for column 0 and and column1613: -0.003701748779263242
1614
Correlation matrix for column 0 and and column1614: -0.000625502281943168
1615
Correlation matrix for column 0 and and column1615: -0.009870056798442016
1616
Correlation matrix for column 0 and and column1616: -0.002168099938409071
1617
Correlation matrix for column 0 and and column1617: -0.01923285857519455
1618
Correlation matrix for column 0 and and column1618: -0.01687546426107496
1619
Correlation matrix for column 0 and and column1619: 0.01787686350545867
1620
Correlation matrix for column 0 and and column1620: -0.007419014306571185
1621
Correlation matrix for column 0 and and column1621: -0.006632772314660683
1622
Correlation matrix for column 0 and and column1622: 0.0

Correlation matrix for column 0 and and column1797: 0.006221173299984424
1798
Correlation matrix for column 0 and and column1798: 0.008624006203932845
1799
Correlation matrix for column 0 and and column1799: -0.003593750642693915
1800
Correlation matrix for column 0 and and column1800: 0.014913383712496982
1801
Correlation matrix for column 0 and and column1801: 0.0017623961244240816
1802
Correlation matrix for column 0 and and column1802: -0.0033309281017258026
1803
Correlation matrix for column 0 and and column1803: 0.0006407212014109479
1804
Correlation matrix for column 0 and and column1804: -0.002517830626431481
1805
Correlation matrix for column 0 and and column1805: 0.002548137739626428
1806
Correlation matrix for column 0 and and column1806: -0.011900106146336036
1807
Correlation matrix for column 0 and and column1807: -0.0031772969946683455
1808
Correlation matrix for column 0 and and column1808: 0.009017054913674048
1809
Correlation matrix for column 0 and and column1809: 0.0

Correlation matrix for column 0 and and column1991: 0.0005887345144783509
1992
Correlation matrix for column 0 and and column1992: -0.000823850725127335
1993
Correlation matrix for column 0 and and column1993: 0.002743248401857458
1994
Correlation matrix for column 0 and and column1994: -0.007521212973967929
1995
Correlation matrix for column 0 and and column1995: 0.0003349842725084161
1996
Correlation matrix for column 0 and and column1996: -0.012638569094968456
1997
Correlation matrix for column 0 and and column1997: -0.0032910629287163924
1998
Correlation matrix for column 0 and and column1998: -0.02473433207878576
1999
Correlation matrix for column 0 and and column1999: -0.012962187464464967
2000
Correlation matrix for column 0 and and column2000: -0.016904121782740884
2001
Correlation matrix for column 0 and and column2001: 0.005367549001667362
2002
Correlation matrix for column 0 and and column2002: -0.003180757937617517
2003
Correlation matrix for column 0 and and column2003: 0.

Correlation matrix for column 0 and and column2175: 0.002297902502594304
2176
Correlation matrix for column 0 and and column2176: -0.014047386510418147
2177
Correlation matrix for column 0 and and column2177: 0.019449578232026067
2178
Correlation matrix for column 0 and and column2178: 0.0019757040283769757
2179
Correlation matrix for column 0 and and column2179: 0.021198863538380033
2180
Correlation matrix for column 0 and and column2180: -0.020269671747463722
2181
Correlation matrix for column 0 and and column2181: 0.01705907838759604
2182
Correlation matrix for column 0 and and column2182: 0.008103137747232418
2183
Correlation matrix for column 0 and and column2183: -0.0053327969400604
2184
Correlation matrix for column 0 and and column2184: -0.001464316380301221
2185
Correlation matrix for column 0 and and column2185: -0.00574992047776929
2186
Correlation matrix for column 0 and and column2186: 0.009362744764637351
2187
Correlation matrix for column 0 and and column2187: -0.0107044

2336
Correlation matrix for column 0 and and column2336: 0.010116146507616773
2337
Correlation matrix for column 0 and and column2337: 0.016206399817505508
2338
Correlation matrix for column 0 and and column2338: -0.02110879456705317
2339
Correlation matrix for column 0 and and column2339: 0.010474474796715812
2340
Correlation matrix for column 0 and and column2340: -0.019365749497311263
2341
Correlation matrix for column 0 and and column2341: -0.015248837905369074
2342
Correlation matrix for column 0 and and column2342: 0.007203488517173141
2343
Correlation matrix for column 0 and and column2343: -0.004875401223770958
2344
Correlation matrix for column 0 and and column2344: -0.00740802375752169
2345
Correlation matrix for column 0 and and column2345: -0.0027214798585436305
2346
Correlation matrix for column 0 and and column2346: 0.001421235166997576
2347
Correlation matrix for column 0 and and column2347: 0.007895155088375086
2348
Correlation matrix for column 0 and and column2348: -0

Correlation matrix for column 0 and and column2491: -0.00496725571121678
2492
Correlation matrix for column 0 and and column2492: -0.008655803279732503
2493
Correlation matrix for column 0 and and column2493: -0.004826005710675435
2494
Correlation matrix for column 0 and and column2494: 0.0012956046092906601
2495
Correlation matrix for column 0 and and column2495: -0.0006897862382945876
2496
Correlation matrix for column 0 and and column2496: 0.001587001861458405
2497
Correlation matrix for column 0 and and column2497: 0.019058602442372338
2498
Correlation matrix for column 0 and and column2498: 0.0254124197461826
2499
Correlation matrix for column 0 and and column2499: -0.01740049118237916
2500
Correlation matrix for column 0 and and column2500: 0.011707439153137714
2501
Correlation matrix for column 0 and and column2501: 0.0031816255302821064
2502
Correlation matrix for column 0 and and column2502: -0.04530045248827363
2503
Correlation matrix for column 0 and and column2503: 0.006541

2645
Correlation matrix for column 0 and and column2645: 0.003551904302886096
2646
Correlation matrix for column 0 and and column2646: -0.015788091740913935
2647
Correlation matrix for column 0 and and column2647: 0.0035688849364628006
2648
Correlation matrix for column 0 and and column2648: 0.0029854209718057603
2649
Correlation matrix for column 0 and and column2649: 0.0036643496772149446
2650
Correlation matrix for column 0 and and column2650: -0.019821300674230245
2651
Correlation matrix for column 0 and and column2651: 0.0030109695206889534
2652
Correlation matrix for column 0 and and column2652: -0.003354327708263191
2653
Correlation matrix for column 0 and and column2653: 0.0007026094990828948
2654
Correlation matrix for column 0 and and column2654: -0.0015986383586927863
2655
Correlation matrix for column 0 and and column2655: 0.003032011880139796
2656
Correlation matrix for column 0 and and column2656: -0.002820034694399259
2657
Correlation matrix for column 0 and and column26

Correlation matrix for column 0 and and column2832: 0.0030317670830762016
2833
Correlation matrix for column 0 and and column2833: -0.007295732131402378
2834
Correlation matrix for column 0 and and column2834: 0.010214434831570205
2835
Correlation matrix for column 0 and and column2835: -0.0004349496833509417
2836
Correlation matrix for column 0 and and column2836: -0.001799652946144551
2837
Correlation matrix for column 0 and and column2837: 0.0048640223221882665
2838
Correlation matrix for column 0 and and column2838: -0.02178125591497732
2839
Correlation matrix for column 0 and and column2839: -0.00045105978997209205
2840
Correlation matrix for column 0 and and column2840: 0.02288276258951811
2841
Correlation matrix for column 0 and and column2841: 0.008109522554116467
2842
Correlation matrix for column 0 and and column2842: -0.017338140905070795
2843
Correlation matrix for column 0 and and column2843: 0.003403520230040676
2844
Correlation matrix for column 0 and and column2844: 0.0

Correlation matrix for column 0 and and column3004: 0.001810386615107151
3005
Correlation matrix for column 0 and and column3005: -0.004942020575764493
3006
Correlation matrix for column 0 and and column3006: -0.0047235011511890124
3007
Correlation matrix for column 0 and and column3007: 0.004588306086556676
3008
Correlation matrix for column 0 and and column3008: -0.0008226813432228193
3009
Correlation matrix for column 0 and and column3009: -0.0025140576198166725
3010
Correlation matrix for column 0 and and column3010: -0.007910655095693876
3011
Correlation matrix for column 0 and and column3011: 0.0005962418102975213
3012
Correlation matrix for column 0 and and column3012: -0.00011345294235572714
3013
Correlation matrix for column 0 and and column3013: 0.01310913462868372
3014
Correlation matrix for column 0 and and column3014: 0.0017735490762642715
3015
Correlation matrix for column 0 and and column3015: 0.004328457046778057
3016
Correlation matrix for column 0 and and column3016: 

3169
Correlation matrix for column 0 and and column3169: 0.009288119591279183
3170
Correlation matrix for column 0 and and column3170: -0.01031400669078475
3171
Correlation matrix for column 0 and and column3171: 0.002994348763463964
3172
Correlation matrix for column 0 and and column3172: -0.00020871158827565542
3173
Correlation matrix for column 0 and and column3173: -0.007330066193087131
3174
Correlation matrix for column 0 and and column3174: -0.0021316688590357546
3175
Correlation matrix for column 0 and and column3175: 0.000948638284720704
3176
Correlation matrix for column 0 and and column3176: -0.0020815744124822095
3177
Correlation matrix for column 0 and and column3177: -0.03486552100584616
3178
Correlation matrix for column 0 and and column3178: 0.0019150046238194842
3179
Correlation matrix for column 0 and and column3179: -0.014543587674575848
3180
Correlation matrix for column 0 and and column3180: 0.0007035896001408049
3181
Correlation matrix for column 0 and and column31

Correlation matrix for column 0 and and column3348: 0.010799814670733364
3349
Correlation matrix for column 0 and and column3349: 0.0026795999100306775
3350
Correlation matrix for column 0 and and column3350: 0.006984490900984946
3351
Correlation matrix for column 0 and and column3351: 6.339324690950868e-05
3352
Correlation matrix for column 0 and and column3352: 0.001549069673946255
3353
Correlation matrix for column 0 and and column3353: -0.003035856734974402
3354
Correlation matrix for column 0 and and column3354: 0.0019685033499108285
3355
Correlation matrix for column 0 and and column3355: 0.01579095273053973
3356
Correlation matrix for column 0 and and column3356: 0.0026728385657064594
3357
Correlation matrix for column 0 and and column3357: 0.003373317612071645
3358
Correlation matrix for column 0 and and column3358: 0.006457052235177099
3359
Correlation matrix for column 0 and and column3359: -0.0037996712954602692
3360
Correlation matrix for column 0 and and column3360: 0.0145

Correlation matrix for column 0 and and column3576: -0.0024998743808941864
3577
Correlation matrix for column 0 and and column3577: 0.015007823774179927
3578
Correlation matrix for column 0 and and column3578: 0.015381121950468578
3579
Correlation matrix for column 0 and and column3579: 0.0036248383029196356
3580
Correlation matrix for column 0 and and column3580: -0.011053762122134696
3581
Correlation matrix for column 0 and and column3581: 0.0015392243143352686
3582
Correlation matrix for column 0 and and column3582: 0.023245704027444015
3583
Correlation matrix for column 0 and and column3583: 0.011409036185115183
3584
Correlation matrix for column 0 and and column3584: 0.004769282010929442
3585
Correlation matrix for column 0 and and column3585: -0.003081672871510333
3586
Correlation matrix for column 0 and and column3586: 0.02611461450707696
3587
Correlation matrix for column 0 and and column3587: -0.002403287569737664
3588
Correlation matrix for column 0 and and column3588: -0.008

Correlation matrix for column 0 and and column3789: 0.011769352723954261
3790
Correlation matrix for column 0 and and column3790: -0.0052386267086644255
3791
Correlation matrix for column 0 and and column3791: 0.008682746723325906
3792
Correlation matrix for column 0 and and column3792: -0.002768394880948254
3793
Correlation matrix for column 0 and and column3793: -0.00636726681129241
3794
Correlation matrix for column 0 and and column3794: -0.010846004795234848
3795
Correlation matrix for column 0 and and column3795: -0.010575657347745938
3796
Correlation matrix for column 0 and and column3796: -0.002273752651321998
3797
Correlation matrix for column 0 and and column3797: 0.006963828255791653
3798
Correlation matrix for column 0 and and column3798: -0.021521007511161327
3799
Correlation matrix for column 0 and and column3799: -0.0017524970021451222
3800
Correlation matrix for column 0 and and column3800: 0.012938409372806083
3801
Correlation matrix for column 0 and and column3801: -0.

Correlation matrix for column 0 and and column3998: -0.003973396649952739
3999
Correlation matrix for column 0 and and column3999: 0.002578593393165467
4000
Correlation matrix for column 0 and and column4000: -0.00267626510834983
4001
Correlation matrix for column 0 and and column4001: -0.0022241217582209603
4002
Correlation matrix for column 0 and and column4002: -0.005868979087718161
4003
Correlation matrix for column 0 and and column4003: 0.0038827307320516216
4004
Correlation matrix for column 0 and and column4004: 0.004603030221393688
4005
Correlation matrix for column 0 and and column4005: 0.015093853044596766
4006
Correlation matrix for column 0 and and column4006: -0.01596317531459701
4007
Correlation matrix for column 0 and and column4007: 0.010512880044930286
4008
Correlation matrix for column 0 and and column4008: 0.008948892225613103
4009
Correlation matrix for column 0 and and column4009: -0.00536193466466599
4010
Correlation matrix for column 0 and and column4010: 0.00325

Correlation matrix for column 0 and and column4208: -0.00258270837138731
4209
Correlation matrix for column 0 and and column4209: 0.01788721797016808
4210
Correlation matrix for column 0 and and column4210: 0.011226601442168756
4211
Correlation matrix for column 0 and and column4211: -0.011154397168900543
4212
Correlation matrix for column 0 and and column4212: -0.0016852629574495446
4213
Correlation matrix for column 0 and and column4213: -0.002816641868901231
4214
Correlation matrix for column 0 and and column4214: 0.017828205863970985
4215
Correlation matrix for column 0 and and column4215: -0.011444377146249655
4216
Correlation matrix for column 0 and and column4216: 0.010651957030035925
4217
Correlation matrix for column 0 and and column4217: 0.0009167631335411328
4218
Correlation matrix for column 0 and and column4218: 0.0025930662546735173
4219
Correlation matrix for column 0 and and column4219: 0.007374179445579627
4220
Correlation matrix for column 0 and and column4220: 0.0189

Correlation matrix for column 0 and and column4410: -0.0020186623080394573
4411
Correlation matrix for column 0 and and column4411: 0.011012848255295768
4412
Correlation matrix for column 0 and and column4412: 0.001643503693005629
4413
Correlation matrix for column 0 and and column4413: 0.01807228997444149
4414
Correlation matrix for column 0 and and column4414: 0.007739628550444308
4415
Correlation matrix for column 0 and and column4415: 0.007902924567104284
4416
Correlation matrix for column 0 and and column4416: -0.020788546234281818
4417
Correlation matrix for column 0 and and column4417: -0.004199861325223674
4418
Correlation matrix for column 0 and and column4418: -0.009962852162247652
4419
Correlation matrix for column 0 and and column4419: -0.005884614508140365
4420
Correlation matrix for column 0 and and column4420: -0.0020925716119416316
4421
Correlation matrix for column 0 and and column4421: -3.973100629780571e-05
4422
Correlation matrix for column 0 and and column4422: -0.

Correlation matrix for column 0 and and column4615: -0.015140173475134572
4616
Correlation matrix for column 0 and and column4616: -0.00732526417887385
4617
Correlation matrix for column 0 and and column4617: 0.008639363585461432
4618
Correlation matrix for column 0 and and column4618: 0.004199455742245186
4619
Correlation matrix for column 0 and and column4619: 0.007854104229544613
4620
Correlation matrix for column 0 and and column4620: -0.013752210877019801
4621
Correlation matrix for column 0 and and column4621: -0.008010915393652548
4622
Correlation matrix for column 0 and and column4622: 0.012716855269330833
4623
Correlation matrix for column 0 and and column4623: 0.01575108670169143
4624
Correlation matrix for column 0 and and column4624: 0.007953332763678557
4625
Correlation matrix for column 0 and and column4625: 0.0056549970061718965
4626
Correlation matrix for column 0 and and column4626: -0.006708864522886793
4627
Correlation matrix for column 0 and and column4627: -0.02184

Correlation matrix for column 0 and and column4819: -0.003551896997658325
4820
Correlation matrix for column 0 and and column4820: -0.002163419485242815
4821
Correlation matrix for column 0 and and column4821: -0.009482624392012567
4822
Correlation matrix for column 0 and and column4822: 0.001705998612121278
4823
Correlation matrix for column 0 and and column4823: 0.005050567558926735
4824
Correlation matrix for column 0 and and column4824: -0.0027013886295454144
4825
Correlation matrix for column 0 and and column4825: -0.0007902685287538316
4826
Correlation matrix for column 0 and and column4826: 0.013147742752782323
4827
Correlation matrix for column 0 and and column4827: 0.007128470545025819
4828
Correlation matrix for column 0 and and column4828: -0.010986987085688703
4829
Correlation matrix for column 0 and and column4829: -0.0029503558359384734
4830
Correlation matrix for column 0 and and column4830: -0.000623102941460811
4831
Correlation matrix for column 0 and and column4831: -

Correlation matrix for column 0 and and column5024: 0.014451267740576717
5025
Correlation matrix for column 0 and and column5025: -0.007766991495379341
5026
Correlation matrix for column 0 and and column5026: -0.007864743663383142
5027
Correlation matrix for column 0 and and column5027: -0.006548530036588151
5028
Correlation matrix for column 0 and and column5028: -0.0070379945087129655
5029
Correlation matrix for column 0 and and column5029: 0.016820595207719228
5030
Correlation matrix for column 0 and and column5030: -0.006300409413498949
5031
Correlation matrix for column 0 and and column5031: -0.02265435294128945
5032
Correlation matrix for column 0 and and column5032: -0.014365302477159184
5033
Correlation matrix for column 0 and and column5033: -0.008724904567355266
5034
Correlation matrix for column 0 and and column5034: 0.007416526867433153
5035
Correlation matrix for column 0 and and column5035: -0.006168755937290277
5036
Correlation matrix for column 0 and and column5036: -0.

Correlation matrix for column 0 and and column5225: 0.01567187670355952
5226
Correlation matrix for column 0 and and column5226: 0.006817041240051072
5227
Correlation matrix for column 0 and and column5227: -0.01853914015894721
5228
Correlation matrix for column 0 and and column5228: -0.008256454892451554
5229
Correlation matrix for column 0 and and column5229: -0.021321845963369973
5230
Correlation matrix for column 0 and and column5230: 0.020125252525005154
5231
Correlation matrix for column 0 and and column5231: 0.01096937453016478
5232
Correlation matrix for column 0 and and column5232: 0.011420831150022261
5233
Correlation matrix for column 0 and and column5233: 0.002644734732856934
5234
Correlation matrix for column 0 and and column5234: 0.007952861665977197
5235
Correlation matrix for column 0 and and column5235: 0.0032768367055228385
5236
Correlation matrix for column 0 and and column5236: -0.011886498677817142
5237
Correlation matrix for column 0 and and column5237: 0.00734888

Correlation matrix for column 0 and and column5431: 0.006722219802140969
5432
Correlation matrix for column 0 and and column5432: -0.009720712792912669
5433
Correlation matrix for column 0 and and column5433: -0.02271156881066139
5434
Correlation matrix for column 0 and and column5434: -0.0067651540664706265
5435
Correlation matrix for column 0 and and column5435: -0.010344633490397511
5436
Correlation matrix for column 0 and and column5436: -0.005733573019188676
5437
Correlation matrix for column 0 and and column5437: 0.00347820798587045
5438
Correlation matrix for column 0 and and column5438: 0.006823573125611889
5439
Correlation matrix for column 0 and and column5439: -0.0022201507443477793
5440
Correlation matrix for column 0 and and column5440: 0.0023476499205988834
5441
Correlation matrix for column 0 and and column5441: 0.0007700401322651567
5442
Correlation matrix for column 0 and and column5442: -0.008839243957205452
5443
Correlation matrix for column 0 and and column5443: 0.0

Correlation matrix for column 0 and and column5627: 0.0037992540770424147
5628
Correlation matrix for column 0 and and column5628: 0.0009565453092763049
5629
Correlation matrix for column 0 and and column5629: -0.00695496817014463
5630
Correlation matrix for column 0 and and column5630: 0.005087377112037753
5631
Correlation matrix for column 0 and and column5631: -0.004040028046399919
5632
Correlation matrix for column 0 and and column5632: -0.02754878371936455
5633
Correlation matrix for column 0 and and column5633: 0.007487433184956338
5634
Correlation matrix for column 0 and and column5634: -0.002310171958052055
5635
Correlation matrix for column 0 and and column5635: 0.022761435308057288
5636
Correlation matrix for column 0 and and column5636: 0.0069367120766268865
5637
Correlation matrix for column 0 and and column5637: 0.0010563761832994884
5638
Correlation matrix for column 0 and and column5638: 0.0006161022028726588
5639
Correlation matrix for column 0 and and column5639: -0.00

Correlation matrix for column 0 and and column5833: 0.019082123919065094
5834
Correlation matrix for column 0 and and column5834: 0.015607429598119879
5835
Correlation matrix for column 0 and and column5835: -0.004494858716593879
5836
Correlation matrix for column 0 and and column5836: -0.007730333819003086
5837
Correlation matrix for column 0 and and column5837: 0.011293446425362424
5838
Correlation matrix for column 0 and and column5838: -0.0031680060473306374
5839
Correlation matrix for column 0 and and column5839: 0.002917202255476485
5840
Correlation matrix for column 0 and and column5840: 0.0063722706388953075
5841
Correlation matrix for column 0 and and column5841: -0.005162821062706501
5842
Correlation matrix for column 0 and and column5842: -0.0010399040937780657
5843
Correlation matrix for column 0 and and column5843: 0.02694686471431802
5844
Correlation matrix for column 0 and and column5844: -0.010752828252468833
5845
Correlation matrix for column 0 and and column5845: -0.0

Correlation matrix for column 0 and and column6030: -0.004850091194862913
6031
Correlation matrix for column 0 and and column6031: 0.003864205482947801
6032
Correlation matrix for column 0 and and column6032: 0.010218075123756828
6033
Correlation matrix for column 0 and and column6033: -0.003755355187606516
6034
Correlation matrix for column 0 and and column6034: 0.006342986400465454
6035
Correlation matrix for column 0 and and column6035: -0.021814424343582925
6036
Correlation matrix for column 0 and and column6036: 0.0022484470186709704
6037
Correlation matrix for column 0 and and column6037: -0.007462834090124719
6038
Correlation matrix for column 0 and and column6038: 0.012095674553867711
6039
Correlation matrix for column 0 and and column6039: -0.011623235996522466
6040
Correlation matrix for column 0 and and column6040: -0.007713168198841888
6041
Correlation matrix for column 0 and and column6041: -0.007134802201841861
6042
Correlation matrix for column 0 and and column6042: 0.00

Correlation matrix for column 0 and and column6231: -0.01681677207157913
6232
Correlation matrix for column 0 and and column6232: -0.0030685609195902964
6233
Correlation matrix for column 0 and and column6233: 0.01745598923111271
6234
Correlation matrix for column 0 and and column6234: 0.0154505898337805
6235
Correlation matrix for column 0 and and column6235: -0.006945174889366607
6236
Correlation matrix for column 0 and and column6236: -0.0041383929890596366
6237
Correlation matrix for column 0 and and column6237: -0.017984128365841335
6238
Correlation matrix for column 0 and and column6238: 0.012059487696470583
6239
Correlation matrix for column 0 and and column6239: -0.009583808660307393
6240
Correlation matrix for column 0 and and column6240: -0.01911756656230998
6241
Correlation matrix for column 0 and and column6241: 0.007492811819964044
6242
Correlation matrix for column 0 and and column6242: -0.0076331608447137685
6243
Correlation matrix for column 0 and and column6243: -0.005

Correlation matrix for column 0 and and column6422: -0.008770870612843666
6423
Correlation matrix for column 0 and and column6423: 0.01399944725588224
6424
Correlation matrix for column 0 and and column6424: 0.005280154669890698
6425
Correlation matrix for column 0 and and column6425: 0.007498611639338176
6426
Correlation matrix for column 0 and and column6426: 0.0016830520442206883
6427
Correlation matrix for column 0 and and column6427: -0.01411248678595781
6428
Correlation matrix for column 0 and and column6428: 0.016748026248794425
6429
Correlation matrix for column 0 and and column6429: 0.007985421251190004
6430
Correlation matrix for column 0 and and column6430: -0.009104834926132548
6431
Correlation matrix for column 0 and and column6431: -0.001816042746574365
6432
Correlation matrix for column 0 and and column6432: 0.00486778777724801
6433
Correlation matrix for column 0 and and column6433: -0.01298466032519063
6434
Correlation matrix for column 0 and and column6434: -0.0022007

Correlation matrix for column 0 and and column6573: -0.006189514938732442
6574
Correlation matrix for column 0 and and column6574: 0.0037714646289698497
6575
Correlation matrix for column 0 and and column6575: 0.0014624845757092004
6576
Correlation matrix for column 0 and and column6576: -0.00948740969772005
6577
Correlation matrix for column 0 and and column6577: -0.009305219688126656
6578
Correlation matrix for column 0 and and column6578: 0.016338825748547572
6579
Correlation matrix for column 0 and and column6579: 0.010494684601815038
6580
Correlation matrix for column 0 and and column6580: 0.007385216385600962
6581
Correlation matrix for column 0 and and column6581: 0.021315368791572198
6582
Correlation matrix for column 0 and and column6582: 0.00021944011074175428
6583
Correlation matrix for column 0 and and column6583: -0.0032595404823751437
6584
Correlation matrix for column 0 and and column6584: 0.0006977342955227959
6585
Correlation matrix for column 0 and and column6585: -0.

6760
Correlation matrix for column 0 and and column6760: 0.006465924425810984
6761
Correlation matrix for column 0 and and column6761: -0.01228548929215868
6762
Correlation matrix for column 0 and and column6762: 0.008535993052324059
6763
Correlation matrix for column 0 and and column6763: 0.0163877602647634
6764
Correlation matrix for column 0 and and column6764: -0.008833822625305564
6765
Correlation matrix for column 0 and and column6765: 0.0014770308019166982
6766
Correlation matrix for column 0 and and column6766: -0.00112864763161927
6767
Correlation matrix for column 0 and and column6767: 0.0008880890192622305
6768
Correlation matrix for column 0 and and column6768: 0.0021776936095388617
6769
Correlation matrix for column 0 and and column6769: 0.009155609174986252
6770
Correlation matrix for column 0 and and column6770: 0.002181167022666197
6771
Correlation matrix for column 0 and and column6771: 0.01228911160967444
6772
Correlation matrix for column 0 and and column6772: 0.0029

Correlation matrix for column 0 and and column6960: 0.009695246885681125
6961
Correlation matrix for column 0 and and column6961: 0.013327988371800617
6962
Correlation matrix for column 0 and and column6962: -0.0006751442968122664
6963
Correlation matrix for column 0 and and column6963: -0.005666037449288408
6964
Correlation matrix for column 0 and and column6964: 0.005504082532547758
6965
Correlation matrix for column 0 and and column6965: -0.018582049731468556
6966
Correlation matrix for column 0 and and column6966: 0.00437351335822155
6967
Correlation matrix for column 0 and and column6967: 0.008123458410351246
6968
Correlation matrix for column 0 and and column6968: 0.006150619096402385
6969
Correlation matrix for column 0 and and column6969: 0.018327026832901067
6970
Correlation matrix for column 0 and and column6970: 0.003283191916120434
6971
Correlation matrix for column 0 and and column6971: -0.006402682173376263
6972
Correlation matrix for column 0 and and column6972: 0.007552

Correlation matrix for column 0 and and column7154: -0.01524291654827234
7155
Correlation matrix for column 0 and and column7155: 0.012725761517045336
7156
Correlation matrix for column 0 and and column7156: 0.002576750569072479
7157
Correlation matrix for column 0 and and column7157: -0.002079427598890466
7158
Correlation matrix for column 0 and and column7158: -0.008601468953267731
7159
Correlation matrix for column 0 and and column7159: -0.004326031415986032
7160
Correlation matrix for column 0 and and column7160: -0.00500293210539157
7161
Correlation matrix for column 0 and and column7161: -0.00648966443593062
7162
Correlation matrix for column 0 and and column7162: -0.015202775247417916
7163
Correlation matrix for column 0 and and column7163: -0.008205458098243665
7164
Correlation matrix for column 0 and and column7164: -0.003508590406402105
7165
Correlation matrix for column 0 and and column7165: -0.0036840448228729647
7166
Correlation matrix for column 0 and and column7166: 0.00

7351
Correlation matrix for column 0 and and column7351: -0.011817375766325034
7352
Correlation matrix for column 0 and and column7352: -0.007860256685090139
7353
Correlation matrix for column 0 and and column7353: -0.001973620639864472
7354
Correlation matrix for column 0 and and column7354: 0.015611504108021108
7355
Correlation matrix for column 0 and and column7355: -0.008786018179955316
7356
Correlation matrix for column 0 and and column7356: 0.01607379024334613
7357
Correlation matrix for column 0 and and column7357: -5.3032385364474296e-05
7358
Correlation matrix for column 0 and and column7358: 0.004466531952469458
7359
Correlation matrix for column 0 and and column7359: -0.008358233946580656
7360
Correlation matrix for column 0 and and column7360: 0.012415819867919922
7361
Correlation matrix for column 0 and and column7361: -0.010626640700365385
7362
Correlation matrix for column 0 and and column7362: -0.006893454844027953
7363
Correlation matrix for column 0 and and column7363

7544
Correlation matrix for column 0 and and column7544: -0.0032658964033803655
7545
Correlation matrix for column 0 and and column7545: 0.0016460810190193661
7546
Correlation matrix for column 0 and and column7546: 0.013434902343676124
7547
Correlation matrix for column 0 and and column7547: -0.0034283064324295465
7548
Correlation matrix for column 0 and and column7548: -0.00045794307736641036
7549
Correlation matrix for column 0 and and column7549: 0.005549526899775775
7550
Correlation matrix for column 0 and and column7550: 0.006516651940284835
7551
Correlation matrix for column 0 and and column7551: 0.013050601420749111
7552
Correlation matrix for column 0 and and column7552: -0.017082621976001362
7553
Correlation matrix for column 0 and and column7553: 0.009321929685523034
7554
Correlation matrix for column 0 and and column7554: -0.016061082531266608
7555
Correlation matrix for column 0 and and column7555: 0.01580437672154991
7556
Correlation matrix for column 0 and and column7556

Correlation matrix for column 0 and and column7729: -0.008245608200089452
7730
Correlation matrix for column 0 and and column7730: -0.002394280069265348
7731
Correlation matrix for column 0 and and column7731: 0.0029138463505334502
7732
Correlation matrix for column 0 and and column7732: 0.013006024877016601
7733
Correlation matrix for column 0 and and column7733: 0.015500571723371541
7734
Correlation matrix for column 0 and and column7734: -0.0007242370052085976
7735
Correlation matrix for column 0 and and column7735: 0.010125178729321158
7736
Correlation matrix for column 0 and and column7736: -0.008852225148502741
7737
Correlation matrix for column 0 and and column7737: -0.007896830930731846
7738
Correlation matrix for column 0 and and column7738: 0.005747833475829244
7739
Correlation matrix for column 0 and and column7739: -0.005140903517978155
7740
Correlation matrix for column 0 and and column7740: -0.00852691636913782
7741
Correlation matrix for column 0 and and column7741: -0.0

Correlation matrix for column 0 and and column7882: -0.008048088232228042
7883
Correlation matrix for column 0 and and column7883: -0.008426223608824102
7884
Correlation matrix for column 0 and and column7884: -0.012358056296045644
7885
Correlation matrix for column 0 and and column7885: -0.0038542076878090407
7886
Correlation matrix for column 0 and and column7886: -0.01891831959089615
7887
Correlation matrix for column 0 and and column7887: -0.0006867795860150746
7888
Correlation matrix for column 0 and and column7888: -0.02277312654670127
7889
Correlation matrix for column 0 and and column7889: 0.0025973614463652007
7890
Correlation matrix for column 0 and and column7890: 0.0027139482238806504
7891
Correlation matrix for column 0 and and column7891: 0.008652789107552907
7892
Correlation matrix for column 0 and and column7892: -0.006378133539866617
7893
Correlation matrix for column 0 and and column7893: -0.006545819584630416


In [44]:
a = 0
b = 0
for i in range(0,len(list_corr_df1)):
    if abs(list_corr_df1[i]) > 0.01:
        a+=1
    else:
        pass
        
for i in range(0,len(list_corr_df_new)):
    if abs(list_corr_df_new[i]) > 0.01:
        b+=1
    else:
        pass

print("Percentage of correlations in df1:",(a/len(list_corr_df1)*100),"%")

print("Percentage of correlations in df_new:",(b/len(list_corr_df_new)*100),"%")

Percentage of correlations in df1: 33.4 %
Percentage of correlations in df_new: 33.24464715570759 %


## Random Projection eps = 0.5

In [15]:
# Pass df1 in the random projection to create a new reduced DataFrame
transformer = random_projection.GaussianRandomProjection(eps = 0.5)
df_new = pd.DataFrame(transformer.fit_transform(df))
df_new

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,432,433,434,435,436,437,438,439,440,441
0,124.813898,-77.062034,1028.711768,1241.781127,1414.969347,373.729019,600.543184,-198.945551,-174.884503,403.608534,...,-1010.688622,608.518498,311.191329,-1758.007622,-758.862976,398.838897,-1390.472223,-136.772879,71.347151,1014.607342
1,418.766121,227.033525,479.483236,968.188954,1713.623245,463.906041,443.394693,697.579767,-102.549741,-445.013160,...,-1914.251156,237.357206,-757.919782,-1498.980189,-229.309622,742.769650,-856.168598,-406.863973,-363.379399,289.569611
2,-287.595514,1305.522782,891.937076,108.672619,1262.362157,763.379691,687.674580,1278.239764,66.749717,463.203579,...,-831.878056,1158.287179,-592.481247,-1338.775018,-1116.775658,331.200977,-633.747011,-174.506789,-61.435405,-139.832681
3,579.368302,1246.617330,1119.152304,691.824120,1229.786483,163.305810,961.481169,714.011166,543.452601,-70.765770,...,-1899.047253,559.903713,-202.701659,-839.533335,-909.299150,269.919311,-609.846911,-277.069024,-1174.517449,491.012534
4,47.608396,638.886379,1028.260062,1301.643296,1519.807878,313.108469,0.936830,132.960820,-701.579870,952.920662,...,-1066.372263,620.065443,-69.450308,-1411.783838,-1163.503924,696.214705,-443.986161,80.754473,-462.713720,759.432423
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-313.987322,625.254891,38.650039,870.652573,1372.389163,270.440624,944.257623,618.924414,465.250422,-763.246220,...,-1241.163987,165.419883,-628.784610,-1587.797399,-1077.183157,627.672811,-823.842017,-701.149400,-585.102884,-75.815818
9996,841.590441,227.685654,766.684707,619.703611,797.761216,633.641032,400.211812,740.278161,-281.013819,-523.565316,...,-832.104646,302.989714,-137.704940,-962.476422,-1104.080213,1741.380619,-1233.218197,-299.162175,-248.402580,509.533531
9997,119.855555,371.397069,160.872070,1311.845742,1671.565629,-338.074010,538.515152,192.279089,288.815507,-610.311267,...,-1348.086721,270.932457,-356.623012,-1615.729988,-621.435789,385.996574,-1141.932790,-545.219344,-1282.514926,992.688622
9998,818.448151,-175.015753,367.743852,-593.880692,982.232684,-616.312886,1064.070871,172.403689,-84.664273,-861.876671,...,-463.504042,1096.968829,-942.104912,-1000.120799,-1195.682454,746.817487,-602.128279,-470.124459,-417.063286,59.374353


In [16]:
# Assigning X to all columns except 0
X_df_new = df_new.drop(columns=0)

# Assigning Y to column 0
Y_df_new = df_new[0]
print(Y_df_new)

# The following line makes Y become a list
Y_df_new = np.array(Y_df_new).reshape(-1)
print(X_df_new.shape,Y_df_new.shape)

list_titles = X_df_new.columns

list_corr_df_new = []
for i in list_titles[0:1000]:
    print(i)
    list_corr_df_new.append(abs(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))

0        124.813898
1        418.766121
2       -287.595514
3        579.368302
4         47.608396
           ...     
9995    -313.987322
9996     841.590441
9997     119.855555
9998     818.448151
9999    1436.121850
Name: 0, Length: 10000, dtype: float64
(10000, 441) (10000,)
1
Correlation matrix for column 0 and and column1: 0.004878477996461379
2
Correlation matrix for column 0 and and column2: -0.0004659120228969102
3
Correlation matrix for column 0 and and column3: -0.008028825462568868
4
Correlation matrix for column 0 and and column4: 0.025203445708758283
5
Correlation matrix for column 0 and and column5: 0.012319348849483006
6
Correlation matrix for column 0 and and column6: 0.015835261833887164
7
Correlation matrix for column 0 and and column7: 0.001475080498439664
8
Correlation matrix for column 0 and and column8: -0.0027197361053428685
9
Correlation matrix for column 0 and and column9: -0.003737709778494682
10
Correlation matrix for column 0 and and column10: 0.0168399267

Correlation matrix for column 0 and and column207: 0.004027256500001956
208
Correlation matrix for column 0 and and column208: -0.002822840571250798
209
Correlation matrix for column 0 and and column209: 0.0005002673959242869
210
Correlation matrix for column 0 and and column210: 0.01857968954293253
211
Correlation matrix for column 0 and and column211: 0.0065839299669945795
212
Correlation matrix for column 0 and and column212: 0.01242118225417362
213
Correlation matrix for column 0 and and column213: 0.004682710327568398
214
Correlation matrix for column 0 and and column214: -0.017696562839076772
215
Correlation matrix for column 0 and and column215: 0.007257965935245272
216
Correlation matrix for column 0 and and column216: -0.0023380229729881995
217
Correlation matrix for column 0 and and column217: -0.0048194766094375105
218
Correlation matrix for column 0 and and column218: -0.0033386733084786488
219
Correlation matrix for column 0 and and column219: -0.0070449433578014045
220
Co

Correlation matrix for column 0 and and column406: 0.006749137211032894
407
Correlation matrix for column 0 and and column407: -0.005733513370668201
408
Correlation matrix for column 0 and and column408: -0.01842510606840736
409
Correlation matrix for column 0 and and column409: -0.014173570910048791
410
Correlation matrix for column 0 and and column410: 0.00872112107634756
411
Correlation matrix for column 0 and and column411: 0.019129019378496124
412
Correlation matrix for column 0 and and column412: 0.010072978922185472
413
Correlation matrix for column 0 and and column413: 0.0035630959865401734
414
Correlation matrix for column 0 and and column414: -0.01215069149240006
415
Correlation matrix for column 0 and and column415: -0.008231117022014706
416
Correlation matrix for column 0 and and column416: -0.020688410422960712
417
Correlation matrix for column 0 and and column417: -0.013396644574405005
418
Correlation matrix for column 0 and and column418: -0.014241964879047034
419
Correl

In [17]:
a = 0
b = 0
for i in range(0,len(list_corr_df1)):
    if abs(list_corr_df1[i]) > 0.01:
        a+=1
    else:
        pass
        
for i in range(0,len(list_corr_df_new)):
    if abs(list_corr_df_new[i]) > 0.01:
        b+=1
    else:
        pass

print("Percentage of correlations in df1:",(a/len(list_corr_df1)*100),"%")

print("Percentage of correlations in df_new:",(b/len(list_corr_df_new)*100),"%")

Percentage of correlations in df1: 31.7 %
Percentage of correlations in df_new: 35.147392290249435 %
