In [4]:
import pandas as pd
import os
import numpy as np

def preprocess(
        dietary_url='https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_DSQTOT.XPT', 
        nutrition_url='https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_DR1TOT.XPT', 
        dental_url='https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_OHXDEN.XPT'):
    '''
    @description: 
        Pre-process the Dietary Supplement, Nutrient Intakes, and Dental Health data
    @param: 
        URLs of the three data
    @return: 
        Pre-processed Pandas DataFrame
    '''
    # please be patient for data download
    dietary_path = './dataset/P_DR1TOT.XPT'
    nutrition_path = './dataset/P_DSQTOT.XPT'
    dental_path = './dataset/P_OHXDEN.XPT'

    df_dietary = pd.read_sas(dietary_path if os.path.exists(dietary_path) else dietary_url)
    df_nutrition = pd.read_sas(nutrition_path if os.path.exists(nutrition_path) else nutrition_url)
    df_dental = pd.read_sas(dental_path if os.path.exists(dental_path) else dental_url)
    print(len(df_nutrition))
    df_data = pd.merge(df_dietary, df_nutrition, on='SEQN')
    df_data = pd.merge(df_data, df_dental, on='SEQN')
    
    # fill the missing precise quantatative or special dietary data with 0
    attrs_set_0 = ['DR1SKY']
    for attr in df_data.columns:
        if attr.startswith('DSQ') or attr.startswith('DRQS') or attr.startswith('DRD'):
            attrs_set_0.append(attr)
    df_data[attrs_set_0] = df_data[attrs_set_0].fillna(0)

    # drop Coronal Caries and Sealants attributes from dental data
    attrs_keep = []
    for attr in df_data.columns:
        if not (attr.startswith('OHX') and (attr.endswith('CTC') or attr.endswith('CSC') 
                or attr.endswith('RTC') or attr.endswith('RSC') or attr.endswith('SE'))):
            attrs_keep.append(attr)
    df_data = df_data[attrs_keep]

    # fill missing "dental implant or not?" with "no"
    df_data.loc[:, 'OHXIMP'].fillna(2, inplace=True)
    # fill missing "how often add salt?" with "don't know"
    df_data.loc[:, 'DBD100'].fillna(9, inplace=True)

    # fill missing "Root Caries, Non-carious Lesion, Root Caries Restoration, Non-carious Lesion Restoration" with "not detected"
    df_data.update(df_data[['OHXRCAR', 'OHXRCARO', 'OHXRRES', 'OHXRRESO']].fillna(0))

    df_data = df_data.dropna()

    return df_data

final_data = preprocess()

  df[x] = v


14300


Total Nutritional columns = 169

In [5]:
nutritions = []
df_nutritions = pd.concat([final_data.loc[:,"SEQN"], final_data.loc[:,"DSQTIODI":"DRD370V"]], axis = 1)
df_nutritions.info()
# print(df_nutritions['DRABF']) 
print(df_nutritions.columns[1:10]) 
from google.colab import files
df_nutritions.to_csv('filename.csv') 
files.download('filename.csv')


<class 'pandas.core.frame.DataFrame'>
Int64Index: 11196 entries, 0 to 13770
Columns: 169 entries, SEQN to DRD370V
dtypes: float64(169)
memory usage: 14.5 MB
Index(['DSQTIODI', 'WTDRD1PP', 'WTDR2DPP', 'DR1DRSTZ', 'DR1EXMER', 'DRABF',
       'DRDINT', 'DR1DBIH', 'DR1DAY'],
      dtype='object')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
# columns which has all values zero
zero_columns = []
for h in df_nutritions.columns:
  if (df_nutritions[h] == 0).all():
     zero_columns.append(h)

print(zero_columns)

[]


In [None]:
binary_df_columns = []
teritionary_df_columns = []
constant_df_columns = []
continuous_variables=[]
for h in df_nutritions.columns:
  d = set(df_nutritions[h])
  if(len(d)==2):
    binary_df_columns.append(h)
  elif(len(d)<=5 and len(d)>2):
    teritionary_df_columns.append(h)
  elif(len(d)==1):
    constant_df_columns.append(h)
  else:
    continuous_variables.append(h)

print(teritionary_df_columns)
print(len(teritionary_df_columns))
print(binary_df_columns)
print(len(binary_df_columns))
print(continuous_variables)
print(len(continuous_variables))
print(constant_df_columns)
print(len(constant_df_columns))

In [10]:
df_continuous_variables = df_nutritions[continuous_variables]
print(df_continuous_variables.corr())

              SEQN  DSQTIODI  WTDRD1PP  WTDR2DPP  DR1EXMER   DR1DBIH  \
SEQN      1.000000 -0.010717 -0.008671 -0.005479 -0.006885 -0.017866   
DSQTIODI -0.010717  1.000000  0.031351  0.033538 -0.002526  0.005043   
WTDRD1PP -0.008671  0.031351  1.000000  0.851316 -0.009085  0.013480   
WTDR2DPP -0.005479  0.033538  0.851316  1.000000 -0.013420  0.012748   
DR1EXMER -0.006885 -0.002526 -0.009085 -0.013420  1.000000 -0.022591   
...            ...       ...       ...       ...       ...       ...   
DRD370QQ  0.009271  0.013037  0.016810  0.013104 -0.002407 -0.004775   
DRD370RQ  0.006941  0.005110  0.001981  0.010649  0.001553 -0.007384   
DRD370SQ  0.000015  0.007721  0.029909  0.029119  0.022051 -0.001167   
DRD370TQ -0.010563  0.005063 -0.023035 -0.018998 -0.025110  0.004224   
DRD370UQ  0.012191  0.006881 -0.017724 -0.013687 -0.000019 -0.016019   

            DR1DAY   DR1LANG  DR1MRESP   DR1HELP  ...  DRD370KQ  DRD370LQ  \
SEQN      0.010836  0.009930 -0.001306  0.002598  ... -0.0

In [11]:
df_continuous_variables.describe()
#describe function provide the details of the correlated values

Unnamed: 0,SEQN,DSQTIODI,WTDRD1PP,WTDR2DPP,DR1EXMER,DR1DBIH,DR1DAY,DR1LANG,DR1MRESP,DR1HELP,...,DRD370KQ,DRD370LQ,DRD370MQ,DRD370NQ,DRD370OQ,DRD370QQ,DRD370RQ,DRD370SQ,DRD370TQ,DRD370UQ
count,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0,...,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0,11196.0
mean,117149.814219,20.236194,26606.907687,26821.09,72.837442,12.085834,4.547517,1.12433,1.272329,10.677653,...,0.033405,0.006431,0.568775,0.07851,0.020096,0.009468,0.035906,0.009736,0.329046,0.068328
std,4494.112384,143.309663,35513.322355,42790.0,21.41261,12.931712,2.200283,0.467901,1.111259,3.319837,...,0.301182,0.172089,1.546367,0.678384,0.366103,0.221448,0.323222,0.13824,1.363461,0.585421
min,109263.0,0.0,805.552485,5.397605e-79,14.0,-42.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,113286.75,0.0,8109.894171,5247.106,73.0,3.0,2.0,1.0,1.0,12.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,117220.5,0.0,15065.105598,13399.89,81.0,9.0,5.0,1.0,1.0,12.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,121071.5,0.0,30541.012895,31255.7,86.0,19.0,6.0,1.0,1.0,12.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,124821.0,12500.0,538404.429708,702943.6,94.0,91.0,7.0,6.0,77.0,12.0,...,12.0,12.0,25.0,24.0,30.0,20.0,10.0,5.0,30.0,30.0


In [12]:
df_corr = df_continuous_variables.corr()
np.fill_diagonal(df_corr.values, 0)
df_corr.style.highlight_max(color='yellow').highlight_min(color='blue')


Unnamed: 0,SEQN,DSQTIODI,WTDRD1PP,WTDR2DPP,DR1EXMER,DR1DBIH,DR1DAY,DR1LANG,DR1MRESP,DR1HELP,DR1TNUMF,DR1TKCAL,DR1TPROT,DR1TCARB,DR1TSUGR,DR1TFIBE,DR1TTFAT,DR1TSFAT,DR1TMFAT,DR1TPFAT,DR1TCHOL,DR1TATOC,DR1TATOA,DR1TRET,DR1TVARA,DR1TACAR,DR1TBCAR,DR1TCRYP,DR1TLYCO,DR1TLZ,DR1TVB1,DR1TVB2,DR1TNIAC,DR1TVB6,DR1TFOLA,DR1TFA,DR1TFF,DR1TFDFE,DR1TCHL,DR1TVB12,DR1TB12A,DR1TVC,DR1TVD,DR1TVK,DR1TCALC,DR1TPHOS,DR1TMAGN,DR1TIRON,DR1TZINC,DR1TCOPP,DR1TSODI,DR1TPOTA,DR1TSELE,DR1TCAFF,DR1TTHEO,DR1TALCO,DR1TMOIS,DR1TS040,DR1TS060,DR1TS080,DR1TS100,DR1TS120,DR1TS140,DR1TS160,DR1TS180,DR1TM161,DR1TM181,DR1TM201,DR1TM221,DR1TP182,DR1TP183,DR1TP184,DR1TP204,DR1TP205,DR1TP225,DR1TP226,DR1_320Z,DR1_330Z,DR1BWATZ,DRD350AQ,DRD350BQ,DRD350CQ,DRD350DQ,DRD350EQ,DRD350FQ,DRD350GQ,DRD350HQ,DRD350IQ,DRD370AQ,DRD370BQ,DRD370CQ,DRD370DQ,DRD370EQ,DRD370FQ,DRD370GQ,DRD370HQ,DRD370IQ,DRD370KQ,DRD370LQ,DRD370MQ,DRD370NQ,DRD370OQ,DRD370QQ,DRD370RQ,DRD370SQ,DRD370TQ,DRD370UQ
SEQN,0.0,-0.010717,-0.008671,-0.005479,-0.006885,-0.017866,0.010836,0.00993,-0.001306,0.002598,-0.010735,-0.01218,-0.002621,-0.015686,-0.010504,-0.019688,-0.009047,-0.002947,-0.014601,-0.00997,0.012001,-0.018595,-0.009654,0.004119,0.000586,-0.010657,-0.003988,0.010546,-0.014146,0.009139,-0.012421,-0.011728,-0.009908,-0.007988,-0.004879,-0.005331,-0.002168,-0.005223,0.000937,0.005418,-0.002445,0.002143,-0.001898,0.004588,-0.00211,-0.011929,-0.016677,-0.009636,-0.000258,-0.009814,-0.014603,-0.01653,0.003609,-0.001802,-0.019581,-0.001772,-0.007525,0.010447,0.012492,-0.000518,0.008145,-0.001264,0.009266,-0.006086,-0.004649,-0.003233,-0.014828,-0.014588,-0.007447,-0.010146,-0.006997,-0.002355,-0.001377,0.008471,0.005706,-0.000941,-0.002779,-0.005491,0.002041,0.009804,0.008733,0.010232,-0.000366,-0.011805,-0.015524,0.007601,-0.006634,-0.004827,0.019305,0.009968,-0.000303,-0.00629,-0.007704,0.005248,-0.003659,-0.008695,0.00758,-0.003038,-0.00334,0.001121,-0.004931,-0.016038,0.009271,0.006941,1.5e-05,-0.010563,0.012191
DSQTIODI,-0.010717,0.0,0.031351,0.033538,-0.002526,0.005043,-0.002097,-0.013252,-0.017335,0.024581,0.077243,0.012889,0.013286,0.013062,-0.002851,0.039528,0.009847,-0.002051,0.014221,0.018836,-0.004323,0.026669,0.009739,0.000249,0.013338,0.010151,0.022958,0.000511,0.001135,0.029247,0.01154,0.010834,0.006671,0.011653,0.040851,0.003336,0.066073,0.02929,0.011443,-0.00547,-0.002821,0.008273,-0.001872,0.031694,0.004462,0.012078,0.039478,0.018961,0.006868,0.0349,0.015081,0.028313,0.005322,0.031766,0.002938,0.002071,0.047234,-0.00241,-0.001424,0.000422,-0.001936,-0.000794,-0.005975,-0.001809,-0.00294,-0.009483,0.015166,0.011945,0.010897,0.01801,0.026458,-0.002427,-0.009978,0.004113,-0.002164,0.003792,0.039497,0.04651,0.002368,0.004917,-0.000506,0.00023,0.011269,0.005253,0.000275,0.001941,0.005288,0.002336,0.002518,0.016465,-0.002967,0.008747,0.010916,0.00837,0.012699,0.002779,0.004516,0.004129,-0.000177,0.020843,0.013539,0.003986,0.013037,0.00511,0.007721,0.005063,0.006881
WTDRD1PP,-0.008671,0.031351,0.0,0.851316,-0.009085,0.01348,-0.171798,-0.058949,-0.070053,0.09517,0.091523,0.038664,0.070022,0.000972,-0.004996,0.055307,0.043902,0.049003,0.044105,0.024683,0.030901,0.059452,0.060516,0.035196,0.057949,0.038245,0.051317,0.002536,0.026178,0.04326,0.048502,0.099227,0.075497,0.07739,0.051172,0.0162,0.069226,0.040956,0.054907,0.029251,0.04823,-0.005796,-0.004694,0.047528,0.053122,0.063623,0.097862,0.047695,0.050915,0.05643,0.044471,0.078872,0.045034,0.15232,0.033998,0.052752,0.139687,0.076998,0.067372,0.029534,0.060365,0.00783,0.058828,0.037543,0.053911,0.034782,0.043097,0.012363,0.007778,0.022804,0.038131,-0.000815,0.038502,-0.003981,0.010352,0.000257,0.079059,0.091751,0.006079,0.034421,0.014947,-0.004286,0.043788,0.007746,7.7e-05,0.05741,0.011164,0.016036,-0.002964,0.065761,-0.006374,-0.053654,0.036875,0.00776,0.059747,-0.016606,-0.014932,0.028701,-0.007469,0.030659,-0.010189,0.000313,0.01681,0.001981,0.029909,-0.023035,-0.017724
WTDR2DPP,-0.005479,0.033538,0.851316,0.0,-0.01342,0.012748,-0.140516,-0.067339,-0.063797,0.082351,0.099016,0.035706,0.0591,0.003011,-0.000475,0.051696,0.038,0.04211,0.039803,0.021195,0.033882,0.046325,0.034725,0.027988,0.047027,0.023024,0.043495,-0.001061,0.018111,0.046953,0.036008,0.086978,0.059068,0.068813,0.04185,0.00399,0.068523,0.030203,0.054169,0.021032,0.036616,-0.002312,-0.00465,0.048609,0.047386,0.056084,0.087344,0.037373,0.040414,0.048189,0.036068,0.071613,0.035644,0.134503,0.035608,0.054609,0.126822,0.066803,0.056057,0.029787,0.053446,0.00994,0.051724,0.032242,0.043366,0.029949,0.039028,0.009104,0.009548,0.019626,0.032669,-0.00073,0.034966,-0.003315,0.006681,0.002671,0.07795,0.093531,0.002941,0.025776,0.015485,-0.002552,0.036113,-0.00207,-0.000711,0.037317,0.010891,0.01128,0.002332,0.049733,-0.005254,-0.045826,0.032114,-0.001003,0.042687,-0.018948,-0.007079,0.018408,-0.008688,0.033004,-0.006543,-0.000123,0.013104,0.010649,0.029119,-0.018998,-0.013687
DR1EXMER,-0.006885,-0.002526,-0.009085,-0.01342,0.0,-0.022591,-0.008584,0.004746,0.002401,0.018184,-0.022704,0.018029,0.014952,0.01166,0.016023,-0.01499,0.01775,0.020224,0.018929,0.005542,0.009266,-0.004023,-0.004211,0.016735,-0.001894,-0.023021,-0.024573,-0.011928,-0.004553,-0.029125,0.006484,0.020884,0.022677,0.022242,-0.011155,-0.002919,-0.015489,-0.00868,0.012973,0.027459,0.014492,-0.008452,0.010822,-0.029802,0.013788,0.017441,1.9e-05,-0.00445,0.012471,0.002243,0.011865,0.001711,-0.001269,0.003932,0.00287,0.008103,0.014741,0.010079,0.007762,0.002812,0.006728,0.006129,0.015814,0.021152,0.020349,0.020754,0.01852,0.022331,-0.001001,0.00623,-0.00435,0.013579,0.001437,0.002035,0.01026,0.000877,0.012604,-0.003074,0.01858,-0.012603,-0.042738,0.008811,-0.012237,-0.009142,-0.026132,-0.009964,-0.02685,-0.017125,-0.01105,-0.01513,0.009848,0.007821,-0.008038,-0.020508,0.031142,0.006482,0.016893,0.002241,-0.012855,-0.027664,-0.008653,-0.011513,-0.002407,0.001553,0.022051,-0.02511,-1.9e-05
DR1DBIH,-0.017866,0.005043,0.01348,0.012748,-0.022591,0.0,0.018465,-0.041771,0.020695,-0.006555,0.030303,0.01282,0.004395,0.011198,0.010397,0.015855,0.007489,0.005429,0.007415,0.008282,-0.007494,0.020734,0.00521,0.004485,0.011173,0.017367,0.012041,0.001756,0.001282,-0.008055,0.009266,0.012291,-0.002002,-0.000288,0.007238,-0.003616,0.017665,0.003716,0.007602,-0.012829,-0.003932,0.003232,0.00806,-0.012482,0.018004,0.013906,0.028335,0.002733,-0.009104,0.006524,-0.000938,0.023477,-0.003007,0.014607,0.018333,0.018447,0.021081,0.010823,0.010959,0.009648,0.016463,0.004272,0.006997,0.003766,0.000992,-0.015675,0.008556,0.004623,-0.000676,0.008852,0.002654,-0.008258,-0.014182,-0.002534,-0.002308,-0.003505,0.00626,0.040168,-0.032254,0.000397,-0.013558,-0.023746,0.003579,-0.000354,0.007235,-0.011922,-0.01824,-0.012296,-0.010712,0.014374,-0.009259,-0.026435,0.017442,-0.00029,0.018071,0.000974,-0.021761,-0.004062,0.0022,0.003901,-0.004739,0.000334,-0.004775,-0.007384,-0.001167,0.004224,-0.016019
DR1DAY,0.010836,-0.002097,-0.171798,-0.140516,-0.008584,0.018465,0.0,0.04068,-0.013275,-0.009037,-0.036152,0.009685,0.007357,0.021317,-0.004181,0.015007,-0.007141,-0.013525,-0.007338,0.001795,-0.022588,0.003694,0.0113,-0.013686,-0.022292,-0.004238,-0.02056,-0.00453,-0.015326,-0.019728,0.006483,-0.001679,0.020606,-0.000154,0.01482,0.01401,0.008308,0.015226,-0.013632,0.016987,0.017493,-0.012465,-0.008097,-0.019219,0.004141,0.010219,0.005323,-0.00272,0.007617,0.0135,0.022439,-0.017633,0.022284,-0.026363,2.9e-05,0.001584,-0.003829,-0.026843,-0.027703,-0.009451,-0.019209,-0.00229,-0.015985,-0.013059,-0.01051,-0.007588,-0.007251,0.007678,-0.002831,0.002188,-0.003398,0.025355,-0.017665,0.017115,0.00775,0.0177,0.013035,-0.01275,0.028738,-0.022891,0.002545,0.003899,-0.016546,-0.002337,-0.013928,-0.026755,-0.00207,0.021743,-0.008378,-0.012811,-0.017169,-0.005066,-0.028676,-0.005875,-0.016759,-0.003563,-0.012782,0.013106,0.003675,-0.000646,-0.009651,0.002973,0.017593,0.006895,-0.023106,0.01435,0.01381
DR1LANG,0.00993,-0.013252,-0.058949,-0.067339,0.004746,-0.041771,0.04068,0.0,0.001532,-0.043548,0.008256,-0.031211,0.020337,0.004282,-0.044547,0.105082,-0.074108,-0.07708,-0.065731,-0.065544,0.0175,-0.044576,-0.025212,-0.03487,-0.011804,0.033654,0.023135,0.062275,0.004825,0.014109,-0.001215,-0.023099,-0.015891,0.00298,0.030227,-0.01149,0.066164,0.01695,0.036053,-0.014726,-0.023513,0.047255,0.007674,0.013138,-0.011037,0.004783,0.056734,0.005792,0.00939,0.022572,0.006916,0.038262,0.013231,-0.022781,-0.056109,-0.024174,0.01566,-0.071914,-0.068441,-0.055823,-0.070246,-0.042431,-0.070716,-0.067323,-0.080915,-0.037899,-0.065806,-0.060573,-0.039287,-0.068998,-0.03821,-0.018142,-0.005062,0.009913,0.002295,0.020738,0.000776,-0.066651,0.067266,0.010465,0.021191,-0.000317,-0.004788,-0.005268,-0.015494,-0.004944,0.053365,0.046065,-0.034039,-0.022102,0.052162,0.005582,0.006319,0.038041,-0.030587,-0.004754,-0.001074,-0.023136,0.014475,-0.004782,-0.004865,0.077711,-0.009637,-0.008258,-0.007667,0.067203,0.054748
DR1MRESP,-0.001306,-0.017335,-0.070053,-0.063797,0.002401,0.020695,-0.013275,0.001532,0.0,-0.277409,-0.013835,-0.096786,-0.105186,-0.054532,-0.021438,-0.072764,-0.097037,-0.069014,-0.100698,-0.097332,-0.082491,-0.071379,0.006201,0.01154,-0.008033,-0.021682,-0.028645,-0.013569,-0.028777,-0.011686,-0.043616,-0.045963,-0.090236,-0.055651,-0.050173,0.003733,-0.090963,-0.033166,-0.093791,-0.019677,0.003196,0.012157,0.041904,-0.04066,0.003301,-0.063765,-0.095931,-0.024639,-0.05617,-0.074564,-0.109869,-0.068073,-0.102808,-0.112955,-0.006449,-0.05953,-0.151233,-0.018745,-0.015302,-0.014189,-0.019011,-0.019857,-0.033502,-0.075618,-0.077769,-0.069425,-0.099299,-0.06665,-0.021511,-0.09446,-0.095426,-0.025263,-0.073417,-0.033253,-0.036203,-0.035573,-0.104442,-0.051516,-0.077367,-0.022771,-0.019529,-0.017621,-0.027604,-0.026199,-0.018495,-0.032773,-0.031144,-0.020782,0.011635,-0.041366,-0.004014,-0.011165,-0.017866,-0.00764,-0.022159,-0.003069,-0.016795,-0.013038,-0.005889,-0.040763,-0.016752,-0.012575,-0.005759,-0.019516,-0.01319,-0.007149,-0.009657
DR1HELP,0.002598,0.024581,0.09517,0.082351,0.018184,-0.006555,-0.009037,-0.043548,-0.277409,0.0,0.021064,0.04658,0.102313,-0.022991,-0.038739,0.045432,0.055608,0.020971,0.068847,0.062262,0.102154,0.056937,0.012595,-0.027696,0.014055,0.031138,0.062011,0.012016,0.015873,0.059165,0.010087,0.045174,0.086686,0.076447,0.026667,-0.052217,0.112131,-0.00017,0.124879,0.025764,0.000727,0.002227,-0.016973,0.077549,-0.013334,0.044364,0.11335,0.00112,0.052711,0.079014,0.072289,0.091657,0.080647,0.172142,-0.071899,0.097808,0.228199,-0.014001,-0.011488,0.002087,-0.002427,0.001388,0.003386,0.022883,0.032574,0.074069,0.06634,0.052882,0.02635,0.056884,0.077943,0.020386,0.083247,0.040084,0.043645,0.050917,0.155088,0.084186,0.107234,0.041176,0.025925,0.023297,0.043248,0.033081,0.029336,0.049587,0.055618,0.023497,-0.011837,0.061936,0.020738,0.035728,0.036101,0.013721,0.030251,-0.002034,0.031111,0.019079,0.005192,0.063574,0.031823,0.006874,0.00403,0.022025,0.021047,0.01785,0.016757


In [13]:
#Sort Correlation Matrix in Python
sorted_mat = df_corr.unstack().sort_values()
print(sorted_mat.iloc[::-1])

DR1TM181  DR1TMFAT    0.998924
DR1TMFAT  DR1TM181    0.998924
DR1TPFAT  DR1TP182    0.997589
DR1TP182  DR1TPFAT    0.997589
DR1TFOLA  DR1TFDFE    0.979111
                        ...   
WTDRD1PP  DR1DAY     -0.171798
DR1BWATZ  DR1_330Z   -0.237833
DR1_330Z  DR1BWATZ   -0.237833
DR1MRESP  DR1HELP    -0.277409
DR1HELP   DR1MRESP   -0.277409
Length: 11449, dtype: float64


In [14]:
# Nan is interpreted as no correlation between the two variables
# zero correlation implies no linear relationship at all

zero correlation implies no linear relationship at all