# Spurious Correlations of Highly Dimensional Big Data

This Notebook aims at showing how PCA and random projection can solve the problem of spurious correlations in Big Data.

In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn import random_projection
from sklearn.utils import shuffle

## DataFrame Creation

In [2]:
# User defined parameters

# Number of rows for df1
x = 5000

# Number of columns for df1
y = 200000

# Number of rows for df2
z = 500

In [3]:
# Creating a dataframe with x number of rows and y number of columns
df = pd.DataFrame(np.random.random_sample((x,y)))
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,199990,199991,199992,199993,199994,199995,199996,199997,199998,199999
0,0.423299,0.286238,0.234228,0.795308,0.730319,0.302544,0.290978,0.659416,0.042346,0.066195,...,0.097518,0.433754,0.783097,0.064996,0.103200,0.523102,0.776857,0.502613,0.982487,0.841418
1,0.816381,0.944832,0.161626,0.675295,0.787008,0.485738,0.526805,0.330176,0.825826,0.429349,...,0.855780,0.858136,0.999160,0.474237,0.917249,0.259441,0.306124,0.498124,0.028439,0.442533
2,0.807397,0.957439,0.550174,0.056663,0.544137,0.300858,0.537433,0.277585,0.916641,0.372928,...,0.067944,0.303812,0.396414,0.076644,0.832944,0.281719,0.333250,0.450563,0.892495,0.024085
3,0.300597,0.611597,0.749131,0.177751,0.055142,0.951217,0.237980,0.625118,0.278371,0.499877,...,0.177218,0.030760,0.268046,0.500260,0.410495,0.682665,0.539649,0.218426,0.325206,0.564303
4,0.470083,0.582246,0.947994,0.090257,0.825287,0.620829,0.816415,0.969734,0.566136,0.708307,...,0.940796,0.581998,0.261179,0.874734,0.650099,0.244954,0.951440,0.445642,0.346142,0.161107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,0.820738,0.550353,0.298106,0.622982,0.639182,0.608662,0.904491,0.891266,0.344757,0.734384,...,0.473797,0.489832,0.639098,0.805287,0.213338,0.930841,0.027141,0.894313,0.054942,0.724669
4996,0.390798,0.949957,0.306250,0.942024,0.822388,0.156229,0.989132,0.130325,0.469961,0.755121,...,0.235458,0.413498,0.519252,0.319559,0.739329,0.215618,0.392133,0.508277,0.851650,0.183547
4997,0.817749,0.930968,0.206794,0.709779,0.886916,0.305799,0.322042,0.660400,0.022350,0.398636,...,0.898117,0.043189,0.559994,0.529916,0.230522,0.281673,0.762757,0.194825,0.319025,0.158733
4998,0.763710,0.847058,0.831562,0.872308,0.504674,0.505779,0.143535,0.591167,0.238092,0.289526,...,0.433385,0.023830,0.481487,0.059562,0.476516,0.855799,0.227713,0.026806,0.684966,0.713247


In order to assess the correlations of the different parameters, correlations between the column with index 0 and the 999 other first columns is assessed.

In [4]:
# Assigning X to all columns except 0
X_df = df.drop(columns=0)
X_df.head()

# Assigning Y to column 0
Y_df = df[0]
print(Y_df)

# The following line makes Y become a list
Y_df = np.array(Y_df).reshape(-1)
print(X_df.shape,Y_df.shape)

list_titles = X_df.columns
list_corr_df1 = []
for i in list_titles[0:1000]:
    print(i)
    list_corr_df1.append(abs(np.corrcoef(Y_df, X_df[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df, X_df[i])[0][1]))

0       0.423299
1       0.816381
2       0.807397
3       0.300597
4       0.470083
          ...   
4995    0.820738
4996    0.390798
4997    0.817749
4998    0.763710
4999    0.596814
Name: 0, Length: 5000, dtype: float64
(5000, 199999) (5000,)
1
Correlation matrix for column 0 and and column1: -0.023674487663404558
2
Correlation matrix for column 0 and and column2: 0.0024602240951482252
3
Correlation matrix for column 0 and and column3: 0.019127639978898298
4
Correlation matrix for column 0 and and column4: -0.001358779073649799
5
Correlation matrix for column 0 and and column5: -0.02859560680124811
6
Correlation matrix for column 0 and and column6: 0.0033081764448351716
7
Correlation matrix for column 0 and and column7: -0.012871042715802576
8
Correlation matrix for column 0 and and column8: -0.005731196885269
9
Correlation matrix for column 0 and and column9: -0.0018780037353041017
10
Correlation matrix for column 0 and and column10: 0.003067916161214379
11
Correlation matrix for

Correlation matrix for column 0 and and column128: -0.012754060987775897
129
Correlation matrix for column 0 and and column129: -0.009861659768511877
130
Correlation matrix for column 0 and and column130: -0.0166716021781955
131
Correlation matrix for column 0 and and column131: -0.025448917014304434
132
Correlation matrix for column 0 and and column132: -0.00018371119379497874
133
Correlation matrix for column 0 and and column133: 0.011780393483371851
134
Correlation matrix for column 0 and and column134: -0.00655091042428319
135
Correlation matrix for column 0 and and column135: -0.021810128071412502
136
Correlation matrix for column 0 and and column136: -0.01743145509855765
137
Correlation matrix for column 0 and and column137: 0.005372292066733121
138
Correlation matrix for column 0 and and column138: -0.005517800972585276
139
Correlation matrix for column 0 and and column139: -0.001979272245911774
140
Correlation matrix for column 0 and and column140: 0.0018502493323597322
141
Cor

Correlation matrix for column 0 and and column348: -0.023141196601250615
349
Correlation matrix for column 0 and and column349: -0.0007020183470651767
350
Correlation matrix for column 0 and and column350: 0.033684412582218826
351
Correlation matrix for column 0 and and column351: 0.004474383300552564
352
Correlation matrix for column 0 and and column352: -0.0018275325315574857
353
Correlation matrix for column 0 and and column353: -0.01766670853763151
354
Correlation matrix for column 0 and and column354: 0.006232556968205553
355
Correlation matrix for column 0 and and column355: -0.0033209466223572764
356
Correlation matrix for column 0 and and column356: -0.005697790324442088
357
Correlation matrix for column 0 and and column357: -0.0027654459248461125
358
Correlation matrix for column 0 and and column358: -0.029132091045753126
359
Correlation matrix for column 0 and and column359: 0.013645105290063433
360
Correlation matrix for column 0 and and column360: 0.011229989991052604
361
C

Correlation matrix for column 0 and and column558: -0.0015261320063953009
559
Correlation matrix for column 0 and and column559: -0.017463091268634618
560
Correlation matrix for column 0 and and column560: -0.01996894264397212
561
Correlation matrix for column 0 and and column561: -0.0039317642777051826
562
Correlation matrix for column 0 and and column562: -0.01208516057540781
563
Correlation matrix for column 0 and and column563: 0.001530229829949594
564
Correlation matrix for column 0 and and column564: 0.01384710294800407
565
Correlation matrix for column 0 and and column565: 0.0038792372100657367
566
Correlation matrix for column 0 and and column566: 0.012424684165657576
567
Correlation matrix for column 0 and and column567: 0.007483296467191756
568
Correlation matrix for column 0 and and column568: 0.011959985745760418
569
Correlation matrix for column 0 and and column569: 0.011568103858822485
570
Correlation matrix for column 0 and and column570: 0.015775295158143993
571
Correla

Correlation matrix for column 0 and and column741: -0.002646064511885265
742
Correlation matrix for column 0 and and column742: 0.002574888097928651
743
Correlation matrix for column 0 and and column743: -0.010181374143833204
744
Correlation matrix for column 0 and and column744: -0.0019815037109874945
745
Correlation matrix for column 0 and and column745: -0.006472944222141554
746
Correlation matrix for column 0 and and column746: -0.036018826528920404
747
Correlation matrix for column 0 and and column747: -0.0030148496946163263
748
Correlation matrix for column 0 and and column748: -0.01823145666997569
749
Correlation matrix for column 0 and and column749: -0.009070102706086663
750
Correlation matrix for column 0 and and column750: 0.011859712860496231
751
Correlation matrix for column 0 and and column751: -0.006120600508215846
752
Correlation matrix for column 0 and and column752: 0.032182325879069815
753
Correlation matrix for column 0 and and column753: -0.002054470038708867
754
C

919
Correlation matrix for column 0 and and column919: -0.012428597133833692
920
Correlation matrix for column 0 and and column920: -0.001467077921158457
921
Correlation matrix for column 0 and and column921: 0.01673725268482326
922
Correlation matrix for column 0 and and column922: -0.002832712150973878
923
Correlation matrix for column 0 and and column923: -0.024892774117140495
924
Correlation matrix for column 0 and and column924: -1.329239415378071e-05
925
Correlation matrix for column 0 and and column925: -0.02388480391072896
926
Correlation matrix for column 0 and and column926: -0.014730322410695246
927
Correlation matrix for column 0 and and column927: 0.0043996569173473925
928
Correlation matrix for column 0 and and column928: 0.022243424700390098
929
Correlation matrix for column 0 and and column929: -0.0005134164429096733
930
Correlation matrix for column 0 and and column930: -0.019422831213219735
931
Correlation matrix for column 0 and and column931: -0.004771412363673308
9

To see if indeed the correlations between the different parameters increases with the data size, the original dataframe is compared to a sub-set dataframe which only takes the first z rows of the original dataframe. If the correlations in the original dataframe are higher than in the smaller dataframe this would prove that the bigger the data size the more frequent the number of spurious correlations.

In [5]:
# Creating smaller dataframe taking z number of rows from original dataframe
df2 = df.iloc[:z]
df2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,199990,199991,199992,199993,199994,199995,199996,199997,199998,199999
0,0.423299,0.286238,0.234228,0.795308,0.730319,0.302544,0.290978,0.659416,0.042346,0.066195,...,0.097518,0.433754,0.783097,0.064996,0.103200,0.523102,0.776857,0.502613,0.982487,0.841418
1,0.816381,0.944832,0.161626,0.675295,0.787008,0.485738,0.526805,0.330176,0.825826,0.429349,...,0.855780,0.858136,0.999160,0.474237,0.917249,0.259441,0.306124,0.498124,0.028439,0.442533
2,0.807397,0.957439,0.550174,0.056663,0.544137,0.300858,0.537433,0.277585,0.916641,0.372928,...,0.067944,0.303812,0.396414,0.076644,0.832944,0.281719,0.333250,0.450563,0.892495,0.024085
3,0.300597,0.611597,0.749131,0.177751,0.055142,0.951217,0.237980,0.625118,0.278371,0.499877,...,0.177218,0.030760,0.268046,0.500260,0.410495,0.682665,0.539649,0.218426,0.325206,0.564303
4,0.470083,0.582246,0.947994,0.090257,0.825287,0.620829,0.816415,0.969734,0.566136,0.708307,...,0.940796,0.581998,0.261179,0.874734,0.650099,0.244954,0.951440,0.445642,0.346142,0.161107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,0.070558,0.310220,0.337681,0.062526,0.229351,0.333282,0.430442,0.490893,0.248938,0.575667,...,0.967857,0.913401,0.278726,0.818273,0.025178,0.617795,0.344884,0.884806,0.017540,0.357151
496,0.305009,0.021939,0.844048,0.872949,0.856412,0.052878,0.729409,0.159357,0.125030,0.034820,...,0.214619,0.113046,0.010925,0.974699,0.277901,0.450778,0.093641,0.486517,0.196321,0.480738
497,0.138977,0.946115,0.263319,0.463415,0.934393,0.012179,0.414217,0.954714,0.828337,0.080086,...,0.767921,0.450336,0.491417,0.124749,0.012162,0.271981,0.674531,0.034007,0.214759,0.439488
498,0.157673,0.473864,0.754880,0.859819,0.047726,0.495484,0.138676,0.183798,0.963274,0.504788,...,0.833294,0.698497,0.012803,0.089010,0.648844,0.328009,0.773669,0.143708,0.303681,0.485604


In [6]:
# Assigning X to all columns except 0
X_df2 = df2.drop(columns=0)
X_df2.head()

# Assigning Y to column 0
Y_df2 = df2[0]
print(Y_df2)

# The following line makes Y become a list
Y_df2 = np.array(Y_df2).reshape(-1)
print(X_df2.shape,Y_df2.shape)

list_titles = X_df2.columns

list_corr_df2 = []
for i in list_titles[0:1000]:
    print(i)
    list_corr_df2.append(abs(np.corrcoef(Y_df2, X_df2[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df2, X_df2[i])[0][1]))

0      0.423299
1      0.816381
2      0.807397
3      0.300597
4      0.470083
         ...   
495    0.070558
496    0.305009
497    0.138977
498    0.157673
499    0.784291
Name: 0, Length: 500, dtype: float64
(500, 199999) (500,)
1
Correlation matrix for column 0 and and column1: -0.03837356253675092
2
Correlation matrix for column 0 and and column2: -0.0702605216229628
3
Correlation matrix for column 0 and and column3: 0.04980978984250686
4
Correlation matrix for column 0 and and column4: 0.015479673434670863
5
Correlation matrix for column 0 and and column5: -0.04817087078166751
6
Correlation matrix for column 0 and and column6: -0.049945807123268585
7
Correlation matrix for column 0 and and column7: -0.015273824559109318
8
Correlation matrix for column 0 and and column8: 0.021230433123800723
9
Correlation matrix for column 0 and and column9: -0.03599109950212162
10
Correlation matrix for column 0 and and column10: 0.04127301687919084
11
Correlation matrix for column 0 and and co

Correlation matrix for column 0 and and column298: -0.007741210327523189
299
Correlation matrix for column 0 and and column299: 0.013750253557489966
300
Correlation matrix for column 0 and and column300: 0.026481085086001024
301
Correlation matrix for column 0 and and column301: 0.03145748217692528
302
Correlation matrix for column 0 and and column302: -0.06419452868800984
303
Correlation matrix for column 0 and and column303: -0.05317537120199276
304
Correlation matrix for column 0 and and column304: 0.06204772303835815
305
Correlation matrix for column 0 and and column305: 0.038525648635533746
306
Correlation matrix for column 0 and and column306: 0.07399292561767633
307
Correlation matrix for column 0 and and column307: 0.012748492406753045
308
Correlation matrix for column 0 and and column308: -0.014588759226985104
309
Correlation matrix for column 0 and and column309: 0.09565161169652509
310
Correlation matrix for column 0 and and column310: 0.01013088816679389
311
Correlation mat

Correlation matrix for column 0 and and column625: -0.0680638505622894
626
Correlation matrix for column 0 and and column626: -0.07464851957535373
627
Correlation matrix for column 0 and and column627: 0.07454684045602272
628
Correlation matrix for column 0 and and column628: 0.02086850572154128
629
Correlation matrix for column 0 and and column629: 0.02221317212438399
630
Correlation matrix for column 0 and and column630: 0.02315013578747094
631
Correlation matrix for column 0 and and column631: 0.04126644659154679
632
Correlation matrix for column 0 and and column632: 0.015600684258481769
633
Correlation matrix for column 0 and and column633: -0.011636575896346995
634
Correlation matrix for column 0 and and column634: -0.012511810335114412
635
Correlation matrix for column 0 and and column635: -0.03314448192093279
636
Correlation matrix for column 0 and and column636: -0.039521392948167755
637
Correlation matrix for column 0 and and column637: -0.019042048120437215
638
Correlation ma

Correlation matrix for column 0 and and column931: 0.003056651416140797
932
Correlation matrix for column 0 and and column932: 0.012161507077517947
933
Correlation matrix for column 0 and and column933: 0.056435631996693485
934
Correlation matrix for column 0 and and column934: 0.0453058503541508
935
Correlation matrix for column 0 and and column935: 0.06847329347613741
936
Correlation matrix for column 0 and and column936: 0.008222124889551109
937
Correlation matrix for column 0 and and column937: -0.023897411192747034
938
Correlation matrix for column 0 and and column938: -0.007762272209255093
939
Correlation matrix for column 0 and and column939: 0.019867557828126478
940
Correlation matrix for column 0 and and column940: -0.07616666755811373
941
Correlation matrix for column 0 and and column941: -0.04870749994926663
942
Correlation matrix for column 0 and and column942: -0.07716720319175953
943
Correlation matrix for column 0 and and column943: 0.06551006951054035
944
Correlation ma

We now compare which of the correlation lists has the highest numbers of every parameter

In [7]:
a = 0
b = 0
for i in range(0,1000):
    if abs(list_corr_df1[i]) > abs(list_corr_df2[i]):
        a+=1
    elif abs(list_corr_df1[i]) < abs(list_corr_df2[i]):
        b+=1
    else:
        print()
        
print(a)
print(b)

188
812


In [8]:
a = 0
b = 0
for i in range(0,len(list_corr_df1)):
    if abs(list_corr_df1[i]) > 0.01:
        a+=1
    else:
        pass
        
for i in range(0,len(list_corr_df2)):
    if abs(list_corr_df2[i]) > 0.01:
        b+=1
    else:
        pass

print("Percentage of correlations in df1:",(a/len(list_corr_df1)*100),"%")

print("Percentage of correlations in df2:",(b/len(list_corr_df2)*100),"%")

Percentage of correlations in df1: 47.099999999999994 %
Percentage of correlations in df2: 81.5 %


Clearly it is seen that the lower the number of observations, the higher the frequency of spurious correlations. However the number of spurious correlations present in the bigger DataFrame is still very significant.

To counter spurious correlations, random projection can be used.

## Random Projection eps = 0.1

In [9]:
# Pass df1 in the random projection to create a new reduced DataFrame
transformer = random_projection.GaussianRandomProjection(eps = 0.1)
df_new = pd.DataFrame(transformer.fit_transform(df))
df_new

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7290,7291,7292,7293,7294,7295,7296,7297,7298,7299
0,5.766567,-4.889632,-4.702120,1.307096,-2.414485,-2.065801,-1.908218,7.961795,-2.195116,-0.066366,...,-0.331560,-2.301461,2.529825,-6.184975,0.129095,-1.195118,-3.523420,-4.338822,2.929693,-7.833502
1,7.131347,0.163587,-6.512746,0.793245,-2.281540,-0.923426,-0.663614,8.198581,-2.720432,2.307675,...,3.027777,-6.114709,1.472086,-6.147909,0.909241,4.434591,-5.630393,-6.780180,1.863719,-7.834982
2,4.495013,-0.856660,-6.337663,3.029170,-3.898417,0.081511,-2.755826,4.757968,-2.871083,0.134491,...,-2.032124,-2.769185,4.891874,-5.292402,0.161873,-0.280675,-5.233155,-5.192933,2.308383,-9.789810
3,4.444541,1.037400,-5.442085,2.722761,-4.062293,-1.552553,-3.616512,5.211884,-1.218724,1.420966,...,1.018994,1.964310,4.992268,-0.909396,-0.363439,5.713594,-2.617629,-3.665217,-1.134185,-6.115157
4,4.358603,-1.415615,-4.839354,1.936785,-0.324032,-0.652117,-1.818697,9.603056,0.339122,2.833288,...,-0.846993,-3.902029,4.545431,-3.179185,1.572433,3.078469,-3.365681,-3.790453,1.657212,-8.318195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,1.824281,-0.918467,-2.237494,3.809783,-2.995322,-4.411524,-2.748356,6.051334,-0.872530,0.075218,...,4.181151,-0.436020,3.767781,-3.498543,0.967227,2.508539,-4.284625,-4.155103,3.794947,-11.551496
4996,4.830840,-2.807417,-6.213595,3.888364,-2.634474,-0.595258,-1.489736,7.398967,-3.565732,1.937408,...,0.086305,-2.019002,4.473074,-5.446273,0.920734,0.506524,-1.981894,-4.063968,0.937120,-6.177902
4997,5.553483,-0.669705,-4.221611,0.935523,-1.205050,-1.241929,-1.238122,4.586572,-1.670419,0.511413,...,2.047117,-3.217178,2.271793,-4.265673,-1.167718,3.052647,-5.674940,-3.739194,4.071546,-9.085544
4998,3.575370,-0.897258,-4.732456,3.078309,-4.209655,-2.882806,-0.970757,6.215809,-3.167527,1.490568,...,0.528118,-2.795458,2.720226,-2.540079,-1.277226,3.142652,-2.546461,-3.635866,4.119996,-8.107902


In [10]:
# Assigning X to all columns except 0
X_df_new = df_new.drop(columns=0)

# Assigning Y to column 0
Y_df_new = df_new[0]
print(Y_df_new)

# The following line makes Y become a list
Y_df_new = np.array(Y_df_new).reshape(-1)
print(X_df_new.shape,Y_df_new.shape)

list_titles = X_df_new.columns

list_corr_df_new = []
for i in list_titles[0:len(list_titles)]:
    print(i)
    list_corr_df_new.append(abs(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))

0       5.766567
1       7.131347
2       4.495013
3       4.444541
4       4.358603
          ...   
4995    1.824281
4996    4.830840
4997    5.553483
4998    3.575370
4999    2.442799
Name: 0, Length: 5000, dtype: float64
(5000, 7299) (5000,)
1
Correlation matrix for column 0 and and column1: 0.019453136087301936
2
Correlation matrix for column 0 and and column2: -0.009126139086362926
3
Correlation matrix for column 0 and and column3: -0.01951891384835207
4
Correlation matrix for column 0 and and column4: 0.005268112168752908
5
Correlation matrix for column 0 and and column5: -0.0026164478976958834
6
Correlation matrix for column 0 and and column6: 0.00816269111625594
7
Correlation matrix for column 0 and and column7: 0.003537355275122217
8
Correlation matrix for column 0 and and column8: -0.007167141353879834
9
Correlation matrix for column 0 and and column9: -0.004854367323485965
10
Correlation matrix for column 0 and and column10: 0.02852054032863329
11
Correlation matrix for col

Correlation matrix for column 0 and and column178: -0.006703503135482321
179
Correlation matrix for column 0 and and column179: 0.007814055349521652
180
Correlation matrix for column 0 and and column180: 0.006955107855228042
181
Correlation matrix for column 0 and and column181: 0.001880081314512529
182
Correlation matrix for column 0 and and column182: -0.0004400060976005044
183
Correlation matrix for column 0 and and column183: 0.025574975844700594
184
Correlation matrix for column 0 and and column184: 0.017365548093126078
185
Correlation matrix for column 0 and and column185: 0.029677525991407033
186
Correlation matrix for column 0 and and column186: -0.005496428547341737
187
Correlation matrix for column 0 and and column187: -0.028431990621683088
188
Correlation matrix for column 0 and and column188: -0.01340592867851144
189
Correlation matrix for column 0 and and column189: -0.017854188993028326
190
Correlation matrix for column 0 and and column190: -0.00043153127121923763
191
Cor

376
Correlation matrix for column 0 and and column376: -0.01814334161723724
377
Correlation matrix for column 0 and and column377: 0.0025207863238552114
378
Correlation matrix for column 0 and and column378: -0.008695170650958749
379
Correlation matrix for column 0 and and column379: -0.010397215266917486
380
Correlation matrix for column 0 and and column380: 0.0015211144680424165
381
Correlation matrix for column 0 and and column381: 0.012863746867760517
382
Correlation matrix for column 0 and and column382: -0.00220308008820233
383
Correlation matrix for column 0 and and column383: -0.011617632239884827
384
Correlation matrix for column 0 and and column384: 0.005002621641674446
385
Correlation matrix for column 0 and and column385: -0.01111797067073554
386
Correlation matrix for column 0 and and column386: -0.022113384375584702
387
Correlation matrix for column 0 and and column387: 0.0021059295264798123
388
Correlation matrix for column 0 and and column388: 0.01818362606079603
389
Co

613
Correlation matrix for column 0 and and column613: 0.017834057411547678
614
Correlation matrix for column 0 and and column614: -0.021239157626482482
615
Correlation matrix for column 0 and and column615: 0.021867465423447974
616
Correlation matrix for column 0 and and column616: 0.004194734819212525
617
Correlation matrix for column 0 and and column617: -0.004282857142063247
618
Correlation matrix for column 0 and and column618: -0.04233755585119796
619
Correlation matrix for column 0 and and column619: -0.01678700170522958
620
Correlation matrix for column 0 and and column620: -0.010293118471727038
621
Correlation matrix for column 0 and and column621: 0.02274156364629139
622
Correlation matrix for column 0 and and column622: -0.0032238133895689195
623
Correlation matrix for column 0 and and column623: -0.004791209583011109
624
Correlation matrix for column 0 and and column624: -0.010382699418623015
625
Correlation matrix for column 0 and and column625: -0.007870542620482994
626
C

Correlation matrix for column 0 and and column833: 0.0024232979724772763
834
Correlation matrix for column 0 and and column834: 0.017777545945516195
835
Correlation matrix for column 0 and and column835: 0.0005362526283057539
836
Correlation matrix for column 0 and and column836: 0.03287108030962218
837
Correlation matrix for column 0 and and column837: -0.0019752517864070904
838
Correlation matrix for column 0 and and column838: -0.009321557785231587
839
Correlation matrix for column 0 and and column839: 0.021143223141285722
840
Correlation matrix for column 0 and and column840: 0.028403680739285425
841
Correlation matrix for column 0 and and column841: 0.018895592034122125
842
Correlation matrix for column 0 and and column842: 0.007698539105845558
843
Correlation matrix for column 0 and and column843: 0.003668963272210053
844
Correlation matrix for column 0 and and column844: 0.0051966967545705196
845
Correlation matrix for column 0 and and column845: -0.03182023927885285
846
Correla

Correlation matrix for column 0 and and column1036: -0.011042076953553704
1037
Correlation matrix for column 0 and and column1037: -8.319437193377527e-05
1038
Correlation matrix for column 0 and and column1038: 0.03198050807111558
1039
Correlation matrix for column 0 and and column1039: 0.01355047242131509
1040
Correlation matrix for column 0 and and column1040: -0.0008308254916300884
1041
Correlation matrix for column 0 and and column1041: -0.01571303720035168
1042
Correlation matrix for column 0 and and column1042: -0.003148539197732566
1043
Correlation matrix for column 0 and and column1043: -0.001452672320265014
1044
Correlation matrix for column 0 and and column1044: 0.01561820975942769
1045
Correlation matrix for column 0 and and column1045: -0.01825151840762614
1046
Correlation matrix for column 0 and and column1046: 0.00215723092261994
1047
Correlation matrix for column 0 and and column1047: 0.006738014918547191
1048
Correlation matrix for column 0 and and column1048: -0.010747

Correlation matrix for column 0 and and column1372: -0.033872807190399166
1373
Correlation matrix for column 0 and and column1373: 0.010564032103628017
1374
Correlation matrix for column 0 and and column1374: -0.004277929635821114
1375
Correlation matrix for column 0 and and column1375: -0.019468040294280523
1376
Correlation matrix for column 0 and and column1376: 0.000496945905553479
1377
Correlation matrix for column 0 and and column1377: 0.006719883542133139
1378
Correlation matrix for column 0 and and column1378: 0.005473618363392245
1379
Correlation matrix for column 0 and and column1379: 0.022504501867368507
1380
Correlation matrix for column 0 and and column1380: -0.015441732579578274
1381
Correlation matrix for column 0 and and column1381: 0.003326599096643864
1382
Correlation matrix for column 0 and and column1382: 0.009679668720887801
1383
Correlation matrix for column 0 and and column1383: 0.0044437431090743025
1384
Correlation matrix for column 0 and and column1384: -0.0179

1600
Correlation matrix for column 0 and and column1600: -0.011937171652354507
1601
Correlation matrix for column 0 and and column1601: 0.003301512294251754
1602
Correlation matrix for column 0 and and column1602: 0.011028970801549555
1603
Correlation matrix for column 0 and and column1603: -0.018042181888158253
1604
Correlation matrix for column 0 and and column1604: -0.010373151028022977
1605
Correlation matrix for column 0 and and column1605: 0.00010758814325871389
1606
Correlation matrix for column 0 and and column1606: -0.012304083767302262
1607
Correlation matrix for column 0 and and column1607: -0.010507963076668382
1608
Correlation matrix for column 0 and and column1608: -0.021830798153254588
1609
Correlation matrix for column 0 and and column1609: 0.0008494770931244082
1610
Correlation matrix for column 0 and and column1610: -0.018832388870639594
1611
Correlation matrix for column 0 and and column1611: 0.003425854178098741
1612
Correlation matrix for column 0 and and column161

Correlation matrix for column 0 and and column1809: -0.02984417705125171
1810
Correlation matrix for column 0 and and column1810: 0.019487309009348752
1811
Correlation matrix for column 0 and and column1811: 0.025836284821308203
1812
Correlation matrix for column 0 and and column1812: 0.009466682059819119
1813
Correlation matrix for column 0 and and column1813: -0.013843525547552958
1814
Correlation matrix for column 0 and and column1814: -0.0048530331092902475
1815
Correlation matrix for column 0 and and column1815: 0.002035210564871877
1816
Correlation matrix for column 0 and and column1816: 0.020975512983015265
1817
Correlation matrix for column 0 and and column1817: 0.03495373635916613
1818
Correlation matrix for column 0 and and column1818: 0.002574488318951016
1819
Correlation matrix for column 0 and and column1819: 0.008220116030766722
1820
Correlation matrix for column 0 and and column1820: 0.008571904956110852
1821
Correlation matrix for column 0 and and column1821: -0.0046498

Correlation matrix for column 0 and and column2012: 0.01594237800622273
2013
Correlation matrix for column 0 and and column2013: 0.026385741303300927
2014
Correlation matrix for column 0 and and column2014: -0.020366585933564912
2015
Correlation matrix for column 0 and and column2015: -0.0152250864752371
2016
Correlation matrix for column 0 and and column2016: 0.001157611066228304
2017
Correlation matrix for column 0 and and column2017: -0.012793336912628406
2018
Correlation matrix for column 0 and and column2018: 0.008857128278509258
2019
Correlation matrix for column 0 and and column2019: -0.02828467340410665
2020
Correlation matrix for column 0 and and column2020: -0.006316555776889375
2021
Correlation matrix for column 0 and and column2021: -0.01808408280795138
2022
Correlation matrix for column 0 and and column2022: -0.008266084610627424
2023
Correlation matrix for column 0 and and column2023: -0.012354555641150736
2024
Correlation matrix for column 0 and and column2024: -0.017749

Correlation matrix for column 0 and and column2221: 0.006573470243063905
2222
Correlation matrix for column 0 and and column2222: 0.010092426230587528
2223
Correlation matrix for column 0 and and column2223: -0.006856214915988172
2224
Correlation matrix for column 0 and and column2224: -0.01619197384511496
2225
Correlation matrix for column 0 and and column2225: 0.009675448298063383
2226
Correlation matrix for column 0 and and column2226: 0.007189742198481308
2227
Correlation matrix for column 0 and and column2227: 0.007859379561199484
2228
Correlation matrix for column 0 and and column2228: 0.005913873752017443
2229
Correlation matrix for column 0 and and column2229: -0.002695788749208028
2230
Correlation matrix for column 0 and and column2230: -0.007214066688594596
2231
Correlation matrix for column 0 and and column2231: -0.008341356163268612
2232
Correlation matrix for column 0 and and column2232: 0.00925179981055464
2233
Correlation matrix for column 0 and and column2233: 0.0311778

Correlation matrix for column 0 and and column2438: -0.005387212819603526
2439
Correlation matrix for column 0 and and column2439: 0.009922678394572318
2440
Correlation matrix for column 0 and and column2440: -0.01878282835624451
2441
Correlation matrix for column 0 and and column2441: -0.021514096848847048
2442
Correlation matrix for column 0 and and column2442: -0.02010499756259731
2443
Correlation matrix for column 0 and and column2443: -0.004622032434721941
2444
Correlation matrix for column 0 and and column2444: -0.002946122177286721
2445
Correlation matrix for column 0 and and column2445: 0.015286673826824547
2446
Correlation matrix for column 0 and and column2446: -0.025172820559498967
2447
Correlation matrix for column 0 and and column2447: 0.004089243446699169
2448
Correlation matrix for column 0 and and column2448: -0.008001118602134469
2449
Correlation matrix for column 0 and and column2449: -0.007050728855718345
2450
Correlation matrix for column 0 and and column2450: 0.007

Correlation matrix for column 0 and and column2649: -0.007354887178188973
2650
Correlation matrix for column 0 and and column2650: -0.002875115686811272
2651
Correlation matrix for column 0 and and column2651: -0.0027857562860548572
2652
Correlation matrix for column 0 and and column2652: -0.01657116392469023
2653
Correlation matrix for column 0 and and column2653: 0.010353747645210295
2654
Correlation matrix for column 0 and and column2654: -0.0012162209546295762
2655
Correlation matrix for column 0 and and column2655: 0.0011576367769980276
2656
Correlation matrix for column 0 and and column2656: -0.004841037162598854
2657
Correlation matrix for column 0 and and column2657: -0.009920342443755585
2658
Correlation matrix for column 0 and and column2658: 0.01986782389384825
2659
Correlation matrix for column 0 and and column2659: 0.020686644198611555
2660
Correlation matrix for column 0 and and column2660: 0.0010995698904628316
2661
Correlation matrix for column 0 and and column2661: 0.0

2848
Correlation matrix for column 0 and and column2848: 0.0054315150113045975
2849
Correlation matrix for column 0 and and column2849: 0.0007087548878086371
2850
Correlation matrix for column 0 and and column2850: 0.0021695067723560223
2851
Correlation matrix for column 0 and and column2851: -0.0034904280472756975
2852
Correlation matrix for column 0 and and column2852: -0.01336444664997794
2853
Correlation matrix for column 0 and and column2853: 0.01683682895161907
2854
Correlation matrix for column 0 and and column2854: -0.01089521620271012
2855
Correlation matrix for column 0 and and column2855: -0.03209884301336148
2856
Correlation matrix for column 0 and and column2856: 0.011818479395093735
2857
Correlation matrix for column 0 and and column2857: 0.005072562732227545
2858
Correlation matrix for column 0 and and column2858: 0.01002878296806388
2859
Correlation matrix for column 0 and and column2859: -0.010192084443797511
2860
Correlation matrix for column 0 and and column2860: 0.0

Correlation matrix for column 0 and and column3039: 0.035077599747796055
3040
Correlation matrix for column 0 and and column3040: 0.003638304407836562
3041
Correlation matrix for column 0 and and column3041: 0.01861731281623434
3042
Correlation matrix for column 0 and and column3042: -0.0027432387497823693
3043
Correlation matrix for column 0 and and column3043: 0.01591017923405337
3044
Correlation matrix for column 0 and and column3044: -0.0008794736436590888
3045
Correlation matrix for column 0 and and column3045: -0.022371210734545136
3046
Correlation matrix for column 0 and and column3046: 0.0016176116938684342
3047
Correlation matrix for column 0 and and column3047: -0.0020827827465135676
3048
Correlation matrix for column 0 and and column3048: 0.017180901737368588
3049
Correlation matrix for column 0 and and column3049: -0.016725918964008123
3050
Correlation matrix for column 0 and and column3050: -0.013141243883697345
3051
Correlation matrix for column 0 and and column3051: -0.0

Correlation matrix for column 0 and and column3233: -0.0299724906533241
3234
Correlation matrix for column 0 and and column3234: -0.0036688076357466928
3235
Correlation matrix for column 0 and and column3235: -0.0018249119403632378
3236
Correlation matrix for column 0 and and column3236: -0.009175637891205995
3237
Correlation matrix for column 0 and and column3237: 0.015844358718110414
3238
Correlation matrix for column 0 and and column3238: -0.013254855822492504
3239
Correlation matrix for column 0 and and column3239: 0.0034495349564233673
3240
Correlation matrix for column 0 and and column3240: 0.0012119036571736816
3241
Correlation matrix for column 0 and and column3241: -0.004710871229057715
3242
Correlation matrix for column 0 and and column3242: 0.00412475654362828
3243
Correlation matrix for column 0 and and column3243: -0.0002291359419785704
3244
Correlation matrix for column 0 and and column3244: -0.018717297900541635
3245
Correlation matrix for column 0 and and column3245: -0

Correlation matrix for column 0 and and column3422: -0.0062269238124347944
3423
Correlation matrix for column 0 and and column3423: -0.020561973605235373
3424
Correlation matrix for column 0 and and column3424: 0.011235784663670256
3425
Correlation matrix for column 0 and and column3425: 0.01751997436218751
3426
Correlation matrix for column 0 and and column3426: -0.02977062782723322
3427
Correlation matrix for column 0 and and column3427: 0.0038172038127933807
3428
Correlation matrix for column 0 and and column3428: 0.006556448550731854
3429
Correlation matrix for column 0 and and column3429: -0.01030773415905076
3430
Correlation matrix for column 0 and and column3430: -0.012704015675443507
3431
Correlation matrix for column 0 and and column3431: -0.0038109442043353123
3432
Correlation matrix for column 0 and and column3432: 0.012219062006065347
3433
Correlation matrix for column 0 and and column3433: -0.00353515776603383
3434
Correlation matrix for column 0 and and column3434: -0.012

Correlation matrix for column 0 and and column3603: -0.039123567459823205
3604
Correlation matrix for column 0 and and column3604: 0.013638087400385591
3605
Correlation matrix for column 0 and and column3605: -0.008537154340028259
3606
Correlation matrix for column 0 and and column3606: -0.008089849471333638
3607
Correlation matrix for column 0 and and column3607: 0.0068034576247456725
3608
Correlation matrix for column 0 and and column3608: 0.004616691883619842
3609
Correlation matrix for column 0 and and column3609: -0.033614539739780964
3610
Correlation matrix for column 0 and and column3610: 0.0003042746731644225
3611
Correlation matrix for column 0 and and column3611: 0.013274628303379547
3612
Correlation matrix for column 0 and and column3612: 0.005678396810965934
3613
Correlation matrix for column 0 and and column3613: 0.00344512677432168
3614
Correlation matrix for column 0 and and column3614: -0.019128925774445096
3615
Correlation matrix for column 0 and and column3615: 0.0252

Correlation matrix for column 0 and and column3792: -0.013246985602073975
3793
Correlation matrix for column 0 and and column3793: 0.0022009429740241005
3794
Correlation matrix for column 0 and and column3794: 0.01040228037406993
3795
Correlation matrix for column 0 and and column3795: 0.02691789916733623
3796
Correlation matrix for column 0 and and column3796: 0.01012984309943885
3797
Correlation matrix for column 0 and and column3797: -0.0014765123832202857
3798
Correlation matrix for column 0 and and column3798: 0.0036954565018473278
3799
Correlation matrix for column 0 and and column3799: 0.00881997992546221
3800
Correlation matrix for column 0 and and column3800: -0.0037413958681166088
3801
Correlation matrix for column 0 and and column3801: 0.004796730046476354
3802
Correlation matrix for column 0 and and column3802: -0.004060231187742843
3803
Correlation matrix for column 0 and and column3803: -0.01420629462113186
3804
Correlation matrix for column 0 and and column3804: -0.00228

Correlation matrix for column 0 and and column3978: 0.011677893064297167
3979
Correlation matrix for column 0 and and column3979: -0.029571290213148707
3980
Correlation matrix for column 0 and and column3980: -0.008467151258370392
3981
Correlation matrix for column 0 and and column3981: 0.014255317558220895
3982
Correlation matrix for column 0 and and column3982: -0.01564403577628371
3983
Correlation matrix for column 0 and and column3983: -0.0017742716862722881
3984
Correlation matrix for column 0 and and column3984: 0.0033231955990030205
3985
Correlation matrix for column 0 and and column3985: -0.010547596336317083
3986
Correlation matrix for column 0 and and column3986: -0.0028263079287610612
3987
Correlation matrix for column 0 and and column3987: -0.007141302648766372
3988
Correlation matrix for column 0 and and column3988: -0.028506502915161283
3989
Correlation matrix for column 0 and and column3989: 0.001118962508753631
3990
Correlation matrix for column 0 and and column3990: 0.

Correlation matrix for column 0 and and column4181: 0.019790314278039164
4182
Correlation matrix for column 0 and and column4182: 0.02286097646244228
4183
Correlation matrix for column 0 and and column4183: 0.0013413475520807994
4184
Correlation matrix for column 0 and and column4184: -0.002624125461089737
4185
Correlation matrix for column 0 and and column4185: -0.0017933017300826553
4186
Correlation matrix for column 0 and and column4186: 0.0032390084790182515
4187
Correlation matrix for column 0 and and column4187: -0.024128137380332332
4188
Correlation matrix for column 0 and and column4188: -0.0035844197933360975
4189
Correlation matrix for column 0 and and column4189: 0.010073835186002059
4190
Correlation matrix for column 0 and and column4190: -0.009805302154038985
4191
Correlation matrix for column 0 and and column4191: 0.013570999884010039
4192
Correlation matrix for column 0 and and column4192: 0.0029708437028693117
4193
Correlation matrix for column 0 and and column4193: -0.

Correlation matrix for column 0 and and column4371: 0.01441540399980289
4372
Correlation matrix for column 0 and and column4372: 0.009060033020555155
4373
Correlation matrix for column 0 and and column4373: 0.0023642072545706325
4374
Correlation matrix for column 0 and and column4374: -0.009155327987570933
4375
Correlation matrix for column 0 and and column4375: -0.02182253071269134
4376
Correlation matrix for column 0 and and column4376: 0.004325858439429419
4377
Correlation matrix for column 0 and and column4377: -0.009425722568867038
4378
Correlation matrix for column 0 and and column4378: -0.0109174305902762
4379
Correlation matrix for column 0 and and column4379: 0.02029266982275422
4380
Correlation matrix for column 0 and and column4380: 0.009045594654218315
4381
Correlation matrix for column 0 and and column4381: -0.0031645873824743325
4382
Correlation matrix for column 0 and and column4382: 0.00856656301700797
4383
Correlation matrix for column 0 and and column4383: -0.02320906

Correlation matrix for column 0 and and column4557: -0.0007992158746654319
4558
Correlation matrix for column 0 and and column4558: -0.003700389044026708
4559
Correlation matrix for column 0 and and column4559: -0.023231551017539064
4560
Correlation matrix for column 0 and and column4560: 0.007094016997211415
4561
Correlation matrix for column 0 and and column4561: -0.009104014452854433
4562
Correlation matrix for column 0 and and column4562: -0.0034952319773506706
4563
Correlation matrix for column 0 and and column4563: 0.0007780575492291306
4564
Correlation matrix for column 0 and and column4564: 0.010056192810931891
4565
Correlation matrix for column 0 and and column4565: 0.020443451920379843
4566
Correlation matrix for column 0 and and column4566: -0.017982957474444495
4567
Correlation matrix for column 0 and and column4567: 0.029118927392344406
4568
Correlation matrix for column 0 and and column4568: -0.014165504380130124
4569
Correlation matrix for column 0 and and column4569: 0.

Correlation matrix for column 0 and and column4742: 0.02062842329787683
4743
Correlation matrix for column 0 and and column4743: 0.033220538533459326
4744
Correlation matrix for column 0 and and column4744: -0.018322015446044847
4745
Correlation matrix for column 0 and and column4745: -0.03283732728339695
4746
Correlation matrix for column 0 and and column4746: -0.013060260729176206
4747
Correlation matrix for column 0 and and column4747: 0.008353013603350275
4748
Correlation matrix for column 0 and and column4748: 0.006205159329060627
4749
Correlation matrix for column 0 and and column4749: 0.0032205147106206615
4750
Correlation matrix for column 0 and and column4750: -0.01740428234438073
4751
Correlation matrix for column 0 and and column4751: -0.007992109029728358
4752
Correlation matrix for column 0 and and column4752: -0.003264787511411607
4753
Correlation matrix for column 0 and and column4753: -0.02473719317846939
4754
Correlation matrix for column 0 and and column4754: -0.01305

Correlation matrix for column 0 and and column4932: 0.0074487279191572655
4933
Correlation matrix for column 0 and and column4933: 0.021194224036883865
4934
Correlation matrix for column 0 and and column4934: 0.013345682330790642
4935
Correlation matrix for column 0 and and column4935: -0.024926638514802257
4936
Correlation matrix for column 0 and and column4936: 0.030002019210886955
4937
Correlation matrix for column 0 and and column4937: -0.019752271825461604
4938
Correlation matrix for column 0 and and column4938: 0.007705275791712488
4939
Correlation matrix for column 0 and and column4939: 0.0029413674961997582
4940
Correlation matrix for column 0 and and column4940: -0.00806230293104932
4941
Correlation matrix for column 0 and and column4941: 0.0013112318069093878
4942
Correlation matrix for column 0 and and column4942: 0.02407964785421506
4943
Correlation matrix for column 0 and and column4943: -0.004545641358066638
4944
Correlation matrix for column 0 and and column4944: 0.01043

Correlation matrix for column 0 and and column5128: -0.040856791508140235
5129
Correlation matrix for column 0 and and column5129: 0.014018996324864003
5130
Correlation matrix for column 0 and and column5130: -0.017915360933166233
5131
Correlation matrix for column 0 and and column5131: -0.004934080013713775
5132
Correlation matrix for column 0 and and column5132: -0.005947725083744615
5133
Correlation matrix for column 0 and and column5133: 0.006680139180470876
5134
Correlation matrix for column 0 and and column5134: -0.005176213037376293
5135
Correlation matrix for column 0 and and column5135: -0.000664543501990375
5136
Correlation matrix for column 0 and and column5136: 0.018216636107836602
5137
Correlation matrix for column 0 and and column5137: -0.004440949835858367
5138
Correlation matrix for column 0 and and column5138: 0.009292715727664358
5139
Correlation matrix for column 0 and and column5139: 0.023440988188144225
5140
Correlation matrix for column 0 and and column5140: -0.00

Correlation matrix for column 0 and and column5315: 0.0044182118873805805
5316
Correlation matrix for column 0 and and column5316: 0.03444944815312355
5317
Correlation matrix for column 0 and and column5317: 0.006455637771272517
5318
Correlation matrix for column 0 and and column5318: -0.008172477888540189
5319
Correlation matrix for column 0 and and column5319: -0.001953999334517549
5320
Correlation matrix for column 0 and and column5320: -0.02990902591255545
5321
Correlation matrix for column 0 and and column5321: -0.0095252898044536
5322
Correlation matrix for column 0 and and column5322: -0.023658238111865147
5323
Correlation matrix for column 0 and and column5323: -0.00812248343330229
5324
Correlation matrix for column 0 and and column5324: 0.00981763905409954
5325
Correlation matrix for column 0 and and column5325: -0.002634683511852254
5326
Correlation matrix for column 0 and and column5326: -0.007173379476327309
5327
Correlation matrix for column 0 and and column5327: 0.0023355

5506
Correlation matrix for column 0 and and column5506: 0.002500653847572964
5507
Correlation matrix for column 0 and and column5507: -0.0075321139804821035
5508
Correlation matrix for column 0 and and column5508: 0.0008543438737412386
5509
Correlation matrix for column 0 and and column5509: 0.01680715026380264
5510
Correlation matrix for column 0 and and column5510: -0.006087517651565272
5511
Correlation matrix for column 0 and and column5511: -0.005961807224039696
5512
Correlation matrix for column 0 and and column5512: 0.019432698970754284
5513
Correlation matrix for column 0 and and column5513: -0.005934422624931349
5514
Correlation matrix for column 0 and and column5514: 0.010566795758530928
5515
Correlation matrix for column 0 and and column5515: -0.0012890236853762638
5516
Correlation matrix for column 0 and and column5516: -2.2720046876066626e-05
5517
Correlation matrix for column 0 and and column5517: 0.010593211908680079
5518
Correlation matrix for column 0 and and column551

Correlation matrix for column 0 and and column5697: -0.01077288401140441
5698
Correlation matrix for column 0 and and column5698: -0.019148178252688698
5699
Correlation matrix for column 0 and and column5699: -0.02477989294717923
5700
Correlation matrix for column 0 and and column5700: -0.019150929686264234
5701
Correlation matrix for column 0 and and column5701: 0.02501849691348124
5702
Correlation matrix for column 0 and and column5702: 0.002067202940548267
5703
Correlation matrix for column 0 and and column5703: -0.0009521412644876206
5704
Correlation matrix for column 0 and and column5704: -0.02630976777908745
5705
Correlation matrix for column 0 and and column5705: -0.008251251826926086
5706
Correlation matrix for column 0 and and column5706: 0.005644099448466375
5707
Correlation matrix for column 0 and and column5707: -0.012495039530333102
5708
Correlation matrix for column 0 and and column5708: -0.00891670643115415
5709
Correlation matrix for column 0 and and column5709: 0.00923

Correlation matrix for column 0 and and column5896: 0.02288160220465776
5897
Correlation matrix for column 0 and and column5897: 0.014232246918305576
5898
Correlation matrix for column 0 and and column5898: 0.03477046398323567
5899
Correlation matrix for column 0 and and column5899: 0.024449775414890455
5900
Correlation matrix for column 0 and and column5900: 0.03643420071325494
5901
Correlation matrix for column 0 and and column5901: -0.001221978621425401
5902
Correlation matrix for column 0 and and column5902: 0.021437106298628942
5903
Correlation matrix for column 0 and and column5903: -0.020526466306017604
5904
Correlation matrix for column 0 and and column5904: 0.004670123213919324
5905
Correlation matrix for column 0 and and column5905: -0.037424502368588214
5906
Correlation matrix for column 0 and and column5906: 0.008556147493476206
5907
Correlation matrix for column 0 and and column5907: -0.021242442728503237
5908
Correlation matrix for column 0 and and column5908: -0.00345682

Correlation matrix for column 0 and and column6119: 0.015052549677452324
6120
Correlation matrix for column 0 and and column6120: -0.011330843913487945
6121
Correlation matrix for column 0 and and column6121: 0.006990052684605521
6122
Correlation matrix for column 0 and and column6122: -0.004219214538590434
6123
Correlation matrix for column 0 and and column6123: 0.017612977481193226
6124
Correlation matrix for column 0 and and column6124: 0.00234944829753721
6125
Correlation matrix for column 0 and and column6125: -0.0029204091354503078
6126
Correlation matrix for column 0 and and column6126: 0.0011413342404543165
6127
Correlation matrix for column 0 and and column6127: 0.008014829270478382
6128
Correlation matrix for column 0 and and column6128: -0.015000692396098596
6129
Correlation matrix for column 0 and and column6129: -0.0032440289902087964
6130
Correlation matrix for column 0 and and column6130: 0.0015206660039328891
6131
Correlation matrix for column 0 and and column6131: 0.00

6363
Correlation matrix for column 0 and and column6363: -0.01649708773811579
6364
Correlation matrix for column 0 and and column6364: -0.00711013130371475
6365
Correlation matrix for column 0 and and column6365: -0.013388656514698423
6366
Correlation matrix for column 0 and and column6366: -0.013329893944027211
6367
Correlation matrix for column 0 and and column6367: 0.01286611443013006
6368
Correlation matrix for column 0 and and column6368: 0.005061700537217271
6369
Correlation matrix for column 0 and and column6369: -0.004399743753393196
6370
Correlation matrix for column 0 and and column6370: -0.005483736493760836
6371
Correlation matrix for column 0 and and column6371: -0.004771359106738682
6372
Correlation matrix for column 0 and and column6372: -0.0031726943589384404
6373
Correlation matrix for column 0 and and column6373: 0.001940990475977588
6374
Correlation matrix for column 0 and and column6374: -0.010304332177975719
6375
Correlation matrix for column 0 and and column6375: 

6600
Correlation matrix for column 0 and and column6600: 0.007691723327371986
6601
Correlation matrix for column 0 and and column6601: -0.007459107250981553
6602
Correlation matrix for column 0 and and column6602: -0.003934080542984127
6603
Correlation matrix for column 0 and and column6603: -0.005781673594213781
6604
Correlation matrix for column 0 and and column6604: -0.013627270753922017
6605
Correlation matrix for column 0 and and column6605: 0.011713231764129637
6606
Correlation matrix for column 0 and and column6606: -0.02117454753578176
6607
Correlation matrix for column 0 and and column6607: -0.0073562954767310024
6608
Correlation matrix for column 0 and and column6608: -0.00709611244138588
6609
Correlation matrix for column 0 and and column6609: -0.008019834991271903
6610
Correlation matrix for column 0 and and column6610: 0.005100086731253177
6611
Correlation matrix for column 0 and and column6611: -0.017061206531605477
6612
Correlation matrix for column 0 and and column6612:

Correlation matrix for column 0 and and column6842: -0.016950168433402375
6843
Correlation matrix for column 0 and and column6843: -0.009491044669607714
6844
Correlation matrix for column 0 and and column6844: 0.004404224951708284
6845
Correlation matrix for column 0 and and column6845: 0.010244807975479641
6846
Correlation matrix for column 0 and and column6846: -0.004588659347516055
6847
Correlation matrix for column 0 and and column6847: -0.021809322853494212
6848
Correlation matrix for column 0 and and column6848: 0.00661833589822355
6849
Correlation matrix for column 0 and and column6849: -0.013884755659224033
6850
Correlation matrix for column 0 and and column6850: -0.009187053820484195
6851
Correlation matrix for column 0 and and column6851: 0.0236749758327453
6852
Correlation matrix for column 0 and and column6852: -0.011291916717751777
6853
Correlation matrix for column 0 and and column6853: 0.0009774871183014762
6854
Correlation matrix for column 0 and and column6854: -0.0239

Correlation matrix for column 0 and and column7091: 0.016591377140143415
7092
Correlation matrix for column 0 and and column7092: -0.03591893426406862
7093
Correlation matrix for column 0 and and column7093: -0.025914196921053848
7094
Correlation matrix for column 0 and and column7094: 0.0015127056715013474
7095
Correlation matrix for column 0 and and column7095: -0.012727220871994209
7096
Correlation matrix for column 0 and and column7096: 0.01933286520368229
7097
Correlation matrix for column 0 and and column7097: 0.009635474268174106
7098
Correlation matrix for column 0 and and column7098: 0.0183572204604787
7099
Correlation matrix for column 0 and and column7099: -0.01769204153668559
7100
Correlation matrix for column 0 and and column7100: 0.019230728719110912
7101
Correlation matrix for column 0 and and column7101: 0.0007199669260956614
7102
Correlation matrix for column 0 and and column7102: -0.02455207932815014
7103
Correlation matrix for column 0 and and column7103: -0.02402286

In [11]:
a = 0
b = 0
for i in range(0,len(list_corr_df1)):
    if abs(list_corr_df1[i]) > 0.01:
        a+=1
    else:
        pass
        
for i in range(0,len(list_corr_df_new)):
    if abs(list_corr_df_new[i]) > 0.01:
        b+=1
    else:
        pass

print("Percentage of correlations in df1:",(a/len(list_corr_df1)*100),"%")

print("Percentage of correlations in df_new:",(b/len(list_corr_df_new)*100),"%")

Percentage of correlations in df1: 47.099999999999994 %
Percentage of correlations in df_new: 48.07507877791478 %


## Random Projection eps = 0.5

In [12]:
# Pass df1 in the random projection to create a new reduced DataFrame
transformer = random_projection.GaussianRandomProjection(eps = 0.5)
df_new = pd.DataFrame(transformer.fit_transform(df))
df_new

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,398,399,400,401,402,403,404,405,406,407
0,-8.629941,-1.265537,15.487725,-10.779927,18.561604,-3.391940,7.267702,-2.028013,27.986498,15.408553,...,-13.697259,-14.877993,-21.551644,19.340911,13.778751,49.931803,-37.622600,0.937428,-7.463711,-9.687514
1,-12.439863,3.933432,8.829969,4.241479,14.488950,-0.049451,-18.869497,-11.082897,37.524516,11.505659,...,-11.738396,-12.472992,-20.130305,17.757440,9.675622,46.153002,-31.508683,4.177262,-17.031040,-11.010520
2,-4.867101,-6.756456,21.754916,3.180779,19.998210,-5.908674,10.151688,-1.440249,29.753864,20.495831,...,-3.283173,-3.792217,-19.867172,28.286943,27.155625,29.168476,-34.765819,3.086872,-0.875922,-1.909711
3,-10.780331,4.596899,13.191428,10.051551,26.749470,-4.216214,-4.601400,-9.267346,19.546493,8.876526,...,6.157830,-11.563185,-30.395220,29.835699,14.097705,29.836473,-23.138226,-2.434577,-10.070075,-26.143900
4,-9.463675,5.615619,-0.038439,6.817649,13.304266,3.466693,-2.246526,4.580938,23.670755,13.491025,...,-1.723470,-22.025860,-19.687756,28.094187,13.068718,35.250452,-38.686700,7.192691,-1.052316,-9.713851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,-8.636917,5.585686,12.959052,-7.155043,21.735511,0.214469,-11.946037,6.590320,38.638227,20.099393,...,-5.871749,-7.378012,-24.329721,32.180573,17.882009,30.000774,-29.201036,2.471314,-4.327141,-8.275181
4996,-23.356470,13.209732,20.191494,9.932011,25.920700,-21.069446,-8.055062,-13.564569,30.590608,13.962384,...,-14.616067,-13.483318,-19.337891,21.090477,10.828681,40.837312,-22.149126,5.315488,-5.595803,-4.076890
4997,-14.743987,6.632898,9.511119,17.185716,5.883583,-13.147554,6.055436,-4.905196,27.485486,23.645821,...,-2.702286,-26.403701,-20.995660,11.746160,7.331648,38.522860,-30.202183,0.296541,-2.319545,1.638461
4998,-3.136446,1.485153,18.386571,-3.254491,11.862612,-8.994281,-5.422773,-18.125227,22.651998,13.602148,...,-5.765000,-1.288352,-26.089154,23.245410,19.113972,31.342267,-32.927140,1.000428,-1.000819,-23.799221


In [13]:
# Assigning X to all columns except 0
X_df_new = df_new.drop(columns=0)

# Assigning Y to column 0
Y_df_new = df_new[0]
print(Y_df_new)

# The following line makes Y become a list
Y_df_new = np.array(Y_df_new).reshape(-1)
print(X_df_new.shape,Y_df_new.shape)

list_titles = X_df_new.columns

list_corr_df_new = []
for i in list_titles[0:1000]:
    print(i)
    list_corr_df_new.append(abs(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))
    print('Correlation matrix for column 0 and and column' + str(i) + ': ' + str(np.corrcoef(Y_df_new, X_df_new[i])[0][1]))

0       -8.629941
1      -12.439863
2       -4.867101
3      -10.780331
4       -9.463675
          ...    
4995    -8.636917
4996   -23.356470
4997   -14.743987
4998    -3.136446
4999   -11.760641
Name: 0, Length: 5000, dtype: float64
(5000, 407) (5000,)
1
Correlation matrix for column 0 and and column1: -0.004673418642172778
2
Correlation matrix for column 0 and and column2: -0.002435154057182532
3
Correlation matrix for column 0 and and column3: 0.008580721076215
4
Correlation matrix for column 0 and and column4: 0.020890477961258824
5
Correlation matrix for column 0 and and column5: -0.006016029187055303
6
Correlation matrix for column 0 and and column6: -0.005049553431315706
7
Correlation matrix for column 0 and and column7: 0.010701594476297826
8
Correlation matrix for column 0 and and column8: -0.003911362280946965
9
Correlation matrix for column 0 and and column9: 0.005573354131393368
10
Correlation matrix for column 0 and and column10: -0.011138980988864441
11
Correlation matr

Correlation matrix for column 0 and and column190: 0.010822645566926341
191
Correlation matrix for column 0 and and column191: -0.011094417587677976
192
Correlation matrix for column 0 and and column192: 0.001670999657985725
193
Correlation matrix for column 0 and and column193: -0.0032519865700896796
194
Correlation matrix for column 0 and and column194: 0.004106026553728409
195
Correlation matrix for column 0 and and column195: -0.011062189732749519
196
Correlation matrix for column 0 and and column196: -0.011551472613718125
197
Correlation matrix for column 0 and and column197: -0.01520331593470054
198
Correlation matrix for column 0 and and column198: -0.0038477719179257222
199
Correlation matrix for column 0 and and column199: 0.01504989155345542
200
Correlation matrix for column 0 and and column200: -0.02914813869065471
201
Correlation matrix for column 0 and and column201: -0.014087165694263689
202
Correlation matrix for column 0 and and column202: 0.02784860960194801
203
Correl

Correlation matrix for column 0 and and column393: 0.01637417372310432
394
Correlation matrix for column 0 and and column394: -0.0022093862243806905
395
Correlation matrix for column 0 and and column395: 0.01824786389214537
396
Correlation matrix for column 0 and and column396: -0.001502816183273011
397
Correlation matrix for column 0 and and column397: 0.003926355916344475
398
Correlation matrix for column 0 and and column398: -0.0017492666231270142
399
Correlation matrix for column 0 and and column399: -0.013272566443975405
400
Correlation matrix for column 0 and and column400: -0.023743356848615116
401
Correlation matrix for column 0 and and column401: -0.018832287313233755
402
Correlation matrix for column 0 and and column402: -0.02934781882024839
403
Correlation matrix for column 0 and and column403: 0.003712000050155227
404
Correlation matrix for column 0 and and column404: -0.011837180227090592
405
Correlation matrix for column 0 and and column405: 0.001347729253609763
406
Corre

In [14]:
a = 0
b = 0
for i in range(0,len(list_corr_df1)):
    if abs(list_corr_df1[i]) > 0.01:
        a+=1
    else:
        pass
        
for i in range(0,len(list_corr_df_new)):
    if abs(list_corr_df_new[i]) > 0.01:
        b+=1
    else:
        pass

print("Percentage of correlations in df1:",(a/len(list_corr_df1)*100),"%")

print("Percentage of correlations in df_new:",(b/len(list_corr_df_new)*100),"%")

Percentage of correlations in df1: 47.099999999999994 %
Percentage of correlations in df_new: 49.385749385749385 %
