## Notebook to combine all feature engineered data and merge it with all non-feature engineered data

In [1]:
import pandas as pd
import os
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.15f}'.format ## see 15 decimal places of the numbers
from functools import reduce

In [2]:
for i in range (2003, 2020):
    
    year=str(i)
    
    PATH_1=os.path.join('..','all_years','CAMS', year ,'feature_engineering')

    # list all the pkl files in the folder
    pkl_files = [file for file in os.listdir(PATH_1) if file.endswith('.pkl')]

    ## read all the pickle files
    dataframes = [pd.read_pickle(os.path.join(PATH_1, file)) for file in pkl_files]

    ## remove the NAME columns from the dataframes. This step was necessary because, functools (below) didnt work
    ## when dataframes had same columns

    for j in range(len(dataframes)):
        if "NAME" in dataframes[j].columns:
            dataframes[j] = dataframes[j].drop(columns="NAME")
   
    #from functools import reduce. merge all the datframes in the list on fips
    df1=reduce(lambda x, y: pd.merge(x, y, on = 'fips'), dataframes)
    df1

    ## get the pm25 above threshold data which is on a different folder
    PATH_2=os.path.join('..','all_years','CAMS', year, 'FE_PM25_'+ year +'.pkl')
    df2=pd.read_pickle(PATH_2)
    df2 = df2.drop('NAME', axis=1)
    df2

    final_df=df1.merge(df2,how='left', on='fips')
    final_df['year']=int(i)
    final_df
    
    ## save in the feature_engineered file
    PATH_3='feature_engineered'
    file=os.path.join(PATH_3, 'fe_'+ year +'.pkl')
    final_df.to_pickle(file)


In [3]:
 # list all the feature engineered data
PATH_4=os.path.join('feature_engineered')
pkl_files = [file for file in os.listdir(PATH_4) if file.endswith('.pkl')]

## read all the pickle files
dataframes = [pd.read_pickle(os.path.join(PATH_4, file)) for file in pkl_files]

In [4]:
df_concat=pd.concat(dataframes)
df_concat = df_concat.drop(['ALAND', 'AWATER','INTPTLAT','INTPTLON'], axis=1) ## not sure how these appeared
df_concat

Unnamed: 0,fips,co above percentile,ethane above percentile,formaldehyde above percentile,hydroxyl above percentile,isoprene above percentile,nitrate above percentile,nitric above percentile,nitrogen dioxide above percentile,Nitrogen above percentile,ozone above percentile,peroxide above percentile,PM10 above percentile,PM10 above threshold,PM1 above percentile,PM2.5 above percentile,propane above percentile,so2 above percentile,Temp above percentile,Temp above threshold,Temp below percentile,Temp below threshold,PM2.5 above threshold,year
0,19175,25.513698630136989,20.821917808219180,14.931506849315069,23.390410958904109,3.116438356164384,12.979452054794521,24.041095890410961,14.554794520547945,9.246575342465754,35.753424657534246,23.356164383561644,28.287671232876711,0.000000000000000,29.178082191780824,27.671232876712327,5.273972602739726,0.000000000000000,38.150684931506852,0.993150684931507,43.664383561643838,23.356164383561644,70.308219178082183,2003
1,19177,30.445205479452053,27.020547945205482,20.650684931506849,20.821917808219180,7.773972602739726,21.130136986301370,24.452054794520546,21.164383561643834,12.328767123287671,35.410958904109592,21.164383561643834,36.643835616438359,0.000000000000000,38.390410958904106,36.232876712328768,10.410958904109590,6.952054794520548,38.184931506849317,0.684931506849315,42.534246575342465,21.472602739726028,75.513698630136986,2003
2,20097,6.780821917808219,9.965753424657533,9.897260273972602,31.678082191780820,0.000000000000000,7.602739726027398,18.801369863013697,1.815068493150685,1.849315068493151,43.390410958904106,30.856164383561648,7.945205479452055,0.000000000000000,7.773972602739726,8.219178082191780,0.479452054794521,0.000000000000000,45.308219178082190,3.938356164383562,35.753424657534246,13.390410958904109,43.595890410958901,2003
3,20109,2.054794520547945,4.726027397260274,2.773972602739726,31.198630136986299,0.000000000000000,1.712328767123288,14.075342465753424,0.821917808219178,1.335616438356164,46.678082191780824,29.075342465753423,4.280821917808219,0.000000000000000,3.972602739726028,4.383561643835616,0.000000000000000,0.000000000000000,40.308219178082197,2.465753424657534,40.787671232876711,16.952054794520549,36.541095890410958,2003
4,20003,22.602739726027394,25.582191780821915,23.527397260273972,25.000000000000000,5.753424657534246,24.075342465753423,26.678082191780820,21.267123287671232,12.671232876712329,40.239726027397261,27.568493150684930,26.541095890410958,0.856164383561644,27.157534246575345,26.335616438356162,10.616438356164384,19.691780821917810,45.958904109589042,2.671232876712329,35.958904109589042,13.458904109589040,71.061643835616437,2003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3228,20119,15.239726027397261,25.547945205479451,5.890410958904110,33.698630136986303,0.000000000000000,3.287671232876712,32.842465753424662,5.547945205479452,8.116438356164384,41.541095890410958,31.917808219178085,18.253424657534246,0.000000000000000,16.335616438356166,18.321917808219180,0.650684931506849,0.000000000000000,42.979452054794521,3.116438356164384,37.191780821917810,12.979452054794521,30.856164383561648,2019
3229,38049,21.643835616438356,30.958904109589042,0.136986301369863,21.438356164383563,0.000000000000000,0.205479452054795,13.253424657534246,18.698630136986303,20.102739726027398,26.643835616438356,10.616438356164384,25.205479452054796,0.000000000000000,27.123287671232877,24.623287671232877,5.547945205479452,15.410958904109590,21.061643835616440,0.000000000000000,59.589041095890416,39.280821917808218,40.410958904109592,2019
3230,72119,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,0.000000000000000,2019
3231,46021,19.623287671232877,28.047945205479451,0.684931506849315,24.486301369863014,0.068493150684932,0.513698630136986,18.013698630136986,7.739726027397261,13.698630136986301,27.328767123287673,13.698630136986301,25.000000000000000,0.000000000000000,25.924657534246577,24.623287671232877,1.849315068493151,0.719178082191781,26.027397260273972,0.034246575342466,57.397260273972606,37.842465753424662,40.958904109589042,2019


In [5]:
df_previous=pd.read_pickle('complete_dataset.pkl')
df_previous

Unnamed: 0,fips,year,MeanLifeExpectency,2m dew point temperature,2m temperature,Black carbon AOD at 550 nm,Dust AOD at 550 nm,Land-sea mask,Mean sea level pressure,Organic matter AOD at 550 nm,PM$_1$,PM$_{2.5}$,PM$_{10}$,Sea salt AOD at 550 nm,Sulphate AOD at 550 nm,Surface geopotential,Surface pressure,Total AOD at 469 nm,Total AOD at 550 nm,Total AOD at 670 nm,Total AOD at 865 nm,Total AOD at 1240 nm,Total column carbon monoxide,Total column ethane,Total column formaldehyde,Total column hydrogen peroxide,Total column hydroxyl radical,Total column isoprene,Total column methane,Total column nitric acid,Total column nitrogen dioxide,Total column nitrogen monoxide,Total column ozone,Total column peroxyacetyl nitrate,Total column propane,Total column sulphur dioxide,Total column water vapour,Carbon monoxide,Dust aerosol (0.03-0.55 µm) mixing ratio,Dust aerosol (0.55-0.9 µm) mixing ratio,Dust aerosol (0.9-20 µm) mixing ratio,Ethane,Formaldehyde,Hydrogen peroxide,Hydrophilic black carbon aerosol mixing ratio,Hydrophilic organic matter aerosol mixing ratio,Hydrophobic black carbon aerosol mixing ratio,Hydrophobic organic matter aerosol mixing ratio,Hydroxyl radical,Isoprene,Nitric acid,Nitrogen dioxide,Nitrogen monoxide,Ozone,Peroxyacetyl nitrate,Propane,Sea salt aerosol (0.03-0.5 µm) mixing ratio,Sea salt aerosol (0.5-5 µm) mixing ratio,Sea salt aerosol (5-20 µm) mixing ratio,Specific humidity,Sulphate aerosol mixing ratio,Sulphur dioxide,Temperature,"Leaf area index, high vegetation","Leaf area index, low vegetation",Snow albedo,Snow depth,Total column water,Vertically integrated mass of dust aerosol (0.03-0.55 µm),Vertically integrated mass of dust aerosol (0.55-9 µm),Vertically integrated mass of dust aerosol (9-20 µm),Vertically integrated mass of hydrophilic black carbon aerosol,Vertically integrated mass of hydrophilic organic matter aerosol,Vertically integrated mass of hydrophobic black carbon aerosol,Vertically integrated mass of hydrophobic organic matter aerosol,Vertically integrated mass of sea salt aerosol (0.03-0.5 µm),Vertically integrated mass of sea salt aerosol (0.5-5 µm),Vertically integrated mass of sea salt aerosol (5-20 µm),Vertically integrated mass of sulphate aerosol,Vertically integrated mass of sulphur dioxide,Relative humidity,Specific humidity.1,10m wind speed,Wet bulb temperature
0,1001,2003,74.628765329999993,285.629269414872169,290.863352214209158,0.007571248154860,0.010371692890225,0.986464531564209,101746.460785242583370,0.097522246908886,0.000000015615338,0.000000019601377,0.000000027192044,0.008606192940150,0.112170314070482,1368.670051439976305,100086.817007783130975,0.288798537560071,0.236241944078782,0.180380990078313,0.123618863685572,0.069164993038643,0.000984550381998,0.000003862356441,0.000003599970514,0.000008836077454,0.000000004853489,0.000001984446001,0.009753834615672,0.000007246513254,0.000004130302193,0.000000542480041,0.006426156201957,0.000008713431766,0.000000740202970,0.000004737026637,25.902900677695214,0.000000230284119,0.000000000654884,0.000000001134462,0.000000000403961,0.000000000705993,0.000000003513651,0.000000000741261,0.000000000224510,0.000000010096395,0.000000000254529,0.000000005847693,0.000000000000028,0.000000014604292,0.000000001474772,0.000000010235872,0.000000000867748,0.000000058145540,0.000000003056157,0.000000000328437,0.000000000023239,0.000000001862196,0.000000000212949,0.010010040534155,0.000000004044040,0.000000009229008,17.605204629052423,3.904460199286052,2.618741604547198,0.879750469058084,0.000000093784908,26.035876613510123,0.000001993471045,0.000004062559380,0.000003433003025,0.000000147754324,0.000003227288298,0.000000653503172,0.000015866768387,0.000000220848889,0.000017570806448,0.000000962454028,0.000011681476149,0.000002886328880,71.152047054515833,0.010074853188795,0.209342152896552,14.185547266162992
1,1003,2003,76.661419230000007,289.075082204243699,292.904504754344714,0.007386436848696,0.012786048677104,0.422456727373741,101714.385671383512090,0.080536809563287,0.000000010517524,0.000000014405518,0.000000020184934,0.013378239581773,0.098994377620422,170.454787516937728,101507.029992429539561,0.258548195567950,0.213081828687413,0.164859066793002,0.116178440206360,0.068677769359272,0.000938742452444,0.000003715951851,0.000002695483373,0.000009696511809,0.000000004901349,0.000000603500308,0.009897973997385,0.000006857057909,0.000002983662698,0.000000444556191,0.006262634883947,0.000006770110044,0.000000668124521,0.000003395163387,28.976961874702152,0.000000181097315,0.000000000693319,0.000000001175466,0.000000000446316,0.000000000603835,0.000000001813065,0.000000000839633,0.000000000174842,0.000000006299133,0.000000000176390,0.000000003192137,0.000000000000043,0.000000003807398,0.000000001664988,0.000000006039749,0.000000000390859,0.000000063545074,0.000000001913777,0.000000000255961,0.000000000080547,0.000000006659332,0.000000004087758,0.012053964031284,0.000000003376943,0.000000004951391,19.898584096705463,2.493748263318578,1.556460793604144,0.879984891251480,0.000000000000000,29.062915465690445,0.000002466103298,0.000005031922454,0.000004136963112,0.000000125085657,0.000002259343014,0.000000656499633,0.000013692095763,0.000000332156531,0.000026746484393,0.000002819174688,0.000010866288941,0.000001623720824,74.399172844073590,0.011615945151359,0.425069173999470,16.735605496466448
2,1005,2003,74.047810630000001,285.841733596869460,291.336096185519921,0.007456527906878,0.011317281497259,0.986926074501393,101751.023443022204447,0.094202817435168,0.000000014401388,0.000000018304776,0.000000025313996,0.009648836533317,0.105918880935702,1263.685437879796154,100218.776953070439049,0.278663173722528,0.228544186494445,0.175283362986200,0.121213904470990,0.068932331694298,0.000971139047240,0.000003810768337,0.000003493466708,0.000008963431171,0.000000004852262,0.000001893919418,0.009766660026615,0.000007104833011,0.000003768096164,0.000000512551830,0.006383518344924,0.000008323164917,0.000000720062676,0.000003584706097,26.556658946025699,0.000000225396546,0.000000000696949,0.000000001213570,0.000000000448433,0.000000000685152,0.000000003130391,0.000000000760942,0.000000000221058,0.000000009478665,0.000000000264652,0.000000005531709,0.000000000000028,0.000000013132761,0.000000001363316,0.000000008074930,0.000000000588739,0.000000055329099,0.000000002668089,0.000000000302966,0.000000000026288,0.000000002109797,0.000000000249691,0.010116411670589,0.000000003173645,0.000000003391913,18.094292469938807,4.174570187840095,2.367511530630567,0.879915804230056,0.000000000000000,26.678455616010211,0.000002183657231,0.000004467258392,0.000003625540118,0.000000146216262,0.000003088513791,0.000000643070878,0.000015246728207,0.000000245350288,0.000019565625472,0.000001077580828,0.000011021879231,0.000001782465496,71.229239958483802,0.010308249362289,0.169872508900564,14.652120108679608
3,1007,2003,73.057987400000002,285.117221444791198,290.369989746752196,0.007462419865064,0.009395796339033,0.986427446209294,101744.124161676227232,0.098690472700501,0.000000016317548,0.000000020307433,0.000000028238688,0.007622676703542,0.115020844565763,1465.598787847873382,99965.021140069555258,0.291671898516200,0.238192235999691,0.181316975737139,0.123444127792394,0.068214863605284,0.000998678357235,0.000003920819959,0.000003582715873,0.000008504147379,0.000000004861835,0.000001871172954,0.009741436446894,0.000007423703281,0.000004769077676,0.000000612072826,0.006478358100203,0.000008976325246,0.000000766435440,0.000005696627855,25.145317334780444,0.000000235334185,0.000000000609200,0.000000001063037,0.000000000377192,0.000000000723610,0.000000003373964,0.000000000699918,0.000000000218154,0.000000010380147,0.000000000233245,0.000000005994225,0.000000000000028,0.000000013479486,0.000000001601430,0.000000013131751,0.000000001689682,0.000000056414364,0.000000003056251,0.000000000358899,0.000000000019447,0.000000001556204,0.000000000164578,0.009712725129444,0.000000004729935,0.000000013603814,17.037290093496722,3.638957935933739,2.754823872966046,0.878014749774689,0.000011574283667,25.289901785165156,0.000001793115476,0.000003660555853,0.000003232456484,0.000000146903744,0.000003387209913,0.000000642778453,0.000015980756564,0.000000196703025,0.000015600967542,0.000000822115559,0.000011901631679,0.000003781823121,72.651517962262062,0.009904532979751,0.247677504482458,13.829282111100266
4,1009,2003,75.053119350000003,284.499985265741145,289.682648936568341,0.007281513555367,0.008532731314996,0.979014066638673,101752.892546598464833,0.098588406606724,0.000000016634353,0.000000020577427,0.000000028665550,0.006934708585955,0.115658964763364,2029.708085049063129,99291.479349611210637,0.290383245403297,0.236996457376135,0.180145848804424,0.122175736413122,0.066979939673203,0.001001629095295,0.000003940059669,0.000003391886198,0.000008036291574,0.000000004870680,0.000001455825865,0.009672508599792,0.000007562329598,0.000005127851415,0.000000645334377,0.006526528523785,0.000009101576113,0.000000785947342,0.000005913161443,24.002328270059678,0.000000243128079,0.000000000565979,0.000000000997541,0.000000000366852,0.000000000742636,0.000000002956875,0.000000000648027,0.000000000216384,0.000000010611162,0.000000000229768,0.000000006212334,0.000000000000031,0.000000010923979,0.000000001704878,0.000000014273459,0.000000002001280,0.000000055735722,0.000000002906905,0.000000000399657,0.000000000017084,0.000000001367524,0.000000000133217,0.009374448422807,0.000000004897891,0.000000013819481,16.199899496941782,3.286418753252266,2.782696514699140,0.874594005175991,0.000146268439547,24.159198736496386,0.000001615365357,0.000003314589592,0.000003036213617,0.000000146816070,0.000003487478775,0.000000623800605,0.000015736496782,0.000000178258749,0.000014094606683,0.000000714870948,0.000011742485568,0.000004065884815,73.001927053727783,0.009520829871887,0.313663384140064,13.082501247882803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52315,56037,2019,78.018891969999999,268.877520643594721,277.901787191187623,0.004447572668915,0.003451011412093,0.992354963979521,101728.279968551723869,0.041300386367054,0.000000005335923,0.000000006986436,0.000000010275853,0.005453571999776,0.023199206535957,21050.679685113027517,78504.848716287262505,0.094917082750167,0.077851784197840,0.060037937518508,0.042029720760866,0.025380803098356,0.000600635606696,0.000002422580521,0.000001110569686,0.000003477352537,0.000000004963488,0.000000111738591,0.007766673497341,0.000003159773773,0.000002211268370,0.000000422028650,0.006791831263302,0.000005037527525,0.000000431923381,0.000001236800654,8.157442605447407,0.000000135233435,0.000000000301368,0.000000000639213,0.000000001674582,0.000000000413873,0.000000000717546,0.000000000341813,0.000000000139838,0.000000004846246,0.000000000050751,0.000000002209570,0.000000000000052,0.000000001232036,0.000000000447900,0.000000008277870,0.000000001935387,0.000000054482406,0.000000001051317,0.000000000111938,0.000000000023342,0.000000001935791,0.000000000215410,0.003747796333939,0.000000001132903,0.000000004655746,3.957965558958222,3.538422076256631,0.536378170765603,0.809210342191357,0.031121132335008,7.932261512636336,0.000000500681546,0.000001122565454,0.000002684661987,0.000000077645757,0.000001693437832,0.000000393823836,0.000007737246361,0.000000168953004,0.000013748361167,0.000000845351148,0.000003309765385,0.000000949466150,59.683907659937070,0.003651965784675,1.605265246419298,0.571281695046519
52316,56039,2019,85.638149490000004,267.983749276156061,274.029863958509281,0.004886699057345,0.001802071901486,0.985416593005752,101726.315068890355178,0.047702406526535,0.000000006090787,0.000000007551662,0.000000010723385,0.005998536443269,0.022471940682464,23557.228264786190266,75982.937825419445289,0.101118369095114,0.082861597930377,0.063712265860080,0.044185611486018,0.025970239503095,0.000574941124339,0.000002344778896,0.000000895435136,0.000003057346386,0.000000004882069,0.000000094252776,0.007496490549075,0.000002428910212,0.000001280820888,0.000000253964486,0.006919423790743,0.000004645643966,0.000000426195600,0.000000438447614,7.320249762011832,0.000000126051777,0.000000000144742,0.000000000252531,0.000000000178171,0.000000000457028,0.000000000717248,0.000000000321785,0.000000000202220,0.000000006177013,0.000000000107593,0.000000002822114,0.000000000000041,0.000000001264543,0.000000000328929,0.000000002819322,0.000000000277159,0.000000061733279,0.000000001017388,0.000000000117618,0.000000000029800,0.000000002468639,0.000000000260899,0.003688190466983,0.000000000875683,0.000000000165686,0.560664797886773,2.670711888609032,1.785488597639217,0.792676062113158,0.096776138949686,7.142217782428033,0.000000326443010,0.000000688971312,0.000000772002994,0.000000088683828,0.000001812836175,0.000000428757218,0.000008221473509,0.000000164259354,0.000013321274004,0.000000884868172,0.000002847300463,0.000000213348938,69.544237907324401,0.003680952572612,1.238102709958286,-1.859769761436167
52317,56041,2019,77.521690719999995,269.968885872635667,277.701483320934017,0.004674971623685,0.002609903875408,0.982910408852341,101741.584218587551732,0.044270471406935,0.000000005798649,0.000000007329074,0.000000010553171,0.006343799473695,0.025104454158955,21702.225723071260290,77908.417302843037760,0.101206776038302,0.083003620213918,0.063943655343822,0.044609998013620,0.026646306462663,0.000594331066349,0.000002406351215,0.000001114224251,0.000003482435859,0.000000004957612,0.000000145421481,0.007705248347299,0.000003083539327,0.000001918737539,0.000000350187799,0.006775792192172,0.000004976728336,0.000000433545454,0.000000940340099,8.093473486304532,0.000000137145948,0.000000000192437,0.000000000377176,0.000000000695268,0.000000000441827,0.000000000815465,0.000000000338547,0.000000000156747,0.000000005430783,0.000000000078347,0.000000002592420,0.000000000000055,0.000000002141827,0.000000000499286,0.000000007515760,0.000000000993720,0.000000054722414,0.000000001095202,0.000000000158386,0.000000000028237,0.000000002350279,0.000000000277151,0.004073729822701,0.000000001089012,0.000000002105004,3.857929071137391,4.031189331292228,0.674010337938586,0.812282421431286,0.054913623758510,7.883527878947897,0.000000425651345,0.000000923241555,0.000001572610985,0.000000081362575,0.000001710751697,0.000000414401108,0.000008121110600,0.000000183399684,0.000014947257657,0.000000984687319,0.000003413513568,0.000000629800449,59.699260933286254,0.003632564658297,1.208359564739262,0.482583691619953
52318,56043,2019,78.137158389999996,270.298909555129228,277.841302762785233,0.005428984507879,0.003225551370174,0.997679853659910,101603.050488321445300,0.053027950153890,0.000000005931365,0.000000007478361,0.000000010677780,0.005445779211497,0.023839783179045,17437.488688889050536,81987.763448374767904,0.111294570197237,0.090968037761792,0.069772435975008,0.048312734751407,0.028470369960335,0.000640373899967,0.000002574499355,0.000001141800622,0.000003523095669,0.000000004936986,0.000000077919272,0.008125626963654,0.000002981591518,0.000001971769568,0.000000350826178,0.006927663456029,0.000005222864432,0.000000472347647,0.000000787430477,9.511025373385666,0.000000138964912,0.000000000231663,0.000000000449157,0.000000000702725,0.000000000421595,0.000000000684047,0.000000000291158,0.000000000159736,0.000000005512380,0.000000000056100,0.000000002464156,0.000000000000044,0.000000000774820,0.000000000320651,0.000000005035151,0.000000000538975,0.000000049046933,0.000000000972974,0.000000000108903,0.000000000018428,0.000000001521941,0.000000000190813,0.004194294518206,0.000000000867287,0.000000000795473,4.343174071493024,4.581955061795957,0.526594406537813,0.784423119137502,0.018203938774941,9.207770779673563,0.000000502582240,0.000001103004055,0.000002179409299,0.000000096633407,0.000002245828337,0.000000478279425,0.000009649527186,0.000000176687043,0.000014365650955,0.000000878019980,0.000003392326354,0.000000552959722,58.175270067637896,0.004117935850445,0.673376207603366,0.807879855752541


In [6]:
complete_df=df_previous.merge(df_concat, on=['fips','year'])
complete_df

Unnamed: 0,fips,year,MeanLifeExpectency,2m dew point temperature,2m temperature,Black carbon AOD at 550 nm,Dust AOD at 550 nm,Land-sea mask,Mean sea level pressure,Organic matter AOD at 550 nm,PM$_1$,PM$_{2.5}$,PM$_{10}$,Sea salt AOD at 550 nm,Sulphate AOD at 550 nm,Surface geopotential,Surface pressure,Total AOD at 469 nm,Total AOD at 550 nm,Total AOD at 670 nm,Total AOD at 865 nm,Total AOD at 1240 nm,Total column carbon monoxide,Total column ethane,Total column formaldehyde,Total column hydrogen peroxide,Total column hydroxyl radical,Total column isoprene,Total column methane,Total column nitric acid,Total column nitrogen dioxide,Total column nitrogen monoxide,Total column ozone,Total column peroxyacetyl nitrate,Total column propane,Total column sulphur dioxide,Total column water vapour,Carbon monoxide,Dust aerosol (0.03-0.55 µm) mixing ratio,Dust aerosol (0.55-0.9 µm) mixing ratio,Dust aerosol (0.9-20 µm) mixing ratio,Ethane,Formaldehyde,Hydrogen peroxide,Hydrophilic black carbon aerosol mixing ratio,Hydrophilic organic matter aerosol mixing ratio,Hydrophobic black carbon aerosol mixing ratio,Hydrophobic organic matter aerosol mixing ratio,Hydroxyl radical,Isoprene,Nitric acid,Nitrogen dioxide,Nitrogen monoxide,Ozone,Peroxyacetyl nitrate,Propane,Sea salt aerosol (0.03-0.5 µm) mixing ratio,Sea salt aerosol (0.5-5 µm) mixing ratio,Sea salt aerosol (5-20 µm) mixing ratio,Specific humidity,Sulphate aerosol mixing ratio,Sulphur dioxide,Temperature,"Leaf area index, high vegetation","Leaf area index, low vegetation",Snow albedo,Snow depth,Total column water,Vertically integrated mass of dust aerosol (0.03-0.55 µm),Vertically integrated mass of dust aerosol (0.55-9 µm),Vertically integrated mass of dust aerosol (9-20 µm),Vertically integrated mass of hydrophilic black carbon aerosol,Vertically integrated mass of hydrophilic organic matter aerosol,Vertically integrated mass of hydrophobic black carbon aerosol,Vertically integrated mass of hydrophobic organic matter aerosol,Vertically integrated mass of sea salt aerosol (0.03-0.5 µm),Vertically integrated mass of sea salt aerosol (0.5-5 µm),Vertically integrated mass of sea salt aerosol (5-20 µm),Vertically integrated mass of sulphate aerosol,Vertically integrated mass of sulphur dioxide,Relative humidity,Specific humidity.1,10m wind speed,Wet bulb temperature,co above percentile,ethane above percentile,formaldehyde above percentile,hydroxyl above percentile,isoprene above percentile,nitrate above percentile,nitric above percentile,nitrogen dioxide above percentile,Nitrogen above percentile,ozone above percentile,peroxide above percentile,PM10 above percentile,PM10 above threshold,PM1 above percentile,PM2.5 above percentile,propane above percentile,so2 above percentile,Temp above percentile,Temp above threshold,Temp below percentile,Temp below threshold,PM2.5 above threshold
0,1001,2003,74.628765329999993,285.629269414872169,290.863352214209158,0.007571248154860,0.010371692890225,0.986464531564209,101746.460785242583370,0.097522246908886,0.000000015615338,0.000000019601377,0.000000027192044,0.008606192940150,0.112170314070482,1368.670051439976305,100086.817007783130975,0.288798537560071,0.236241944078782,0.180380990078313,0.123618863685572,0.069164993038643,0.000984550381998,0.000003862356441,0.000003599970514,0.000008836077454,0.000000004853489,0.000001984446001,0.009753834615672,0.000007246513254,0.000004130302193,0.000000542480041,0.006426156201957,0.000008713431766,0.000000740202970,0.000004737026637,25.902900677695214,0.000000230284119,0.000000000654884,0.000000001134462,0.000000000403961,0.000000000705993,0.000000003513651,0.000000000741261,0.000000000224510,0.000000010096395,0.000000000254529,0.000000005847693,0.000000000000028,0.000000014604292,0.000000001474772,0.000000010235872,0.000000000867748,0.000000058145540,0.000000003056157,0.000000000328437,0.000000000023239,0.000000001862196,0.000000000212949,0.010010040534155,0.000000004044040,0.000000009229008,17.605204629052423,3.904460199286052,2.618741604547198,0.879750469058084,0.000000093784908,26.035876613510123,0.000001993471045,0.000004062559380,0.000003433003025,0.000000147754324,0.000003227288298,0.000000653503172,0.000015866768387,0.000000220848889,0.000017570806448,0.000000962454028,0.000011681476149,0.000002886328880,71.152047054515833,0.010074853188795,0.209342152896552,14.185547266162992,28.458904109589039,30.958904109589042,52.910958904109584,25.684931506849317,38.732876712328768,47.465753424657535,26.027397260273972,22.876712328767123,8.390410958904109,48.938356164383563,32.705479452054789,40.445205479452056,0.034246575342466,38.356164383561641,40.719178082191782,19.315068493150687,38.150684931506852,62.191780821917810,0.000000000000000,18.801369863013697,1.643835616438356,78.219178082191775
1,1003,2003,76.661419230000007,289.075082204243699,292.904504754344714,0.007386436848696,0.012786048677104,0.422456727373741,101714.385671383512090,0.080536809563287,0.000000010517524,0.000000014405518,0.000000020184934,0.013378239581773,0.098994377620422,170.454787516937728,101507.029992429539561,0.258548195567950,0.213081828687413,0.164859066793002,0.116178440206360,0.068677769359272,0.000938742452444,0.000003715951851,0.000002695483373,0.000009696511809,0.000000004901349,0.000000603500308,0.009897973997385,0.000006857057909,0.000002983662698,0.000000444556191,0.006262634883947,0.000006770110044,0.000000668124521,0.000003395163387,28.976961874702152,0.000000181097315,0.000000000693319,0.000000001175466,0.000000000446316,0.000000000603835,0.000000001813065,0.000000000839633,0.000000000174842,0.000000006299133,0.000000000176390,0.000000003192137,0.000000000000043,0.000000003807398,0.000000001664988,0.000000006039749,0.000000000390859,0.000000063545074,0.000000001913777,0.000000000255961,0.000000000080547,0.000000006659332,0.000000004087758,0.012053964031284,0.000000003376943,0.000000004951391,19.898584096705463,2.493748263318578,1.556460793604144,0.879984891251480,0.000000000000000,29.062915465690445,0.000002466103298,0.000005031922454,0.000004136963112,0.000000125085657,0.000002259343014,0.000000656499633,0.000013692095763,0.000000332156531,0.000026746484393,0.000002819174688,0.000010866288941,0.000001623720824,74.399172844073590,0.011615945151359,0.425069173999470,16.735605496466448,10.034246575342467,17.979452054794521,27.910958904109588,31.232876712328768,14.280821917808220,26.609589041095887,32.568493150684930,5.616438356164384,2.876712328767123,61.301369863013697,50.273972602739725,19.349315068493151,0.000000000000000,14.726027397260275,20.308219178082194,7.910958904109588,7.534246575342466,72.123287671232873,0.000000000000000,7.842465753424658,0.136986301369863,70.000000000000000
2,1005,2003,74.047810630000001,285.841733596869460,291.336096185519921,0.007456527906878,0.011317281497259,0.986926074501393,101751.023443022204447,0.094202817435168,0.000000014401388,0.000000018304776,0.000000025313996,0.009648836533317,0.105918880935702,1263.685437879796154,100218.776953070439049,0.278663173722528,0.228544186494445,0.175283362986200,0.121213904470990,0.068932331694298,0.000971139047240,0.000003810768337,0.000003493466708,0.000008963431171,0.000000004852262,0.000001893919418,0.009766660026615,0.000007104833011,0.000003768096164,0.000000512551830,0.006383518344924,0.000008323164917,0.000000720062676,0.000003584706097,26.556658946025699,0.000000225396546,0.000000000696949,0.000000001213570,0.000000000448433,0.000000000685152,0.000000003130391,0.000000000760942,0.000000000221058,0.000000009478665,0.000000000264652,0.000000005531709,0.000000000000028,0.000000013132761,0.000000001363316,0.000000008074930,0.000000000588739,0.000000055329099,0.000000002668089,0.000000000302966,0.000000000026288,0.000000002109797,0.000000000249691,0.010116411670589,0.000000003173645,0.000000003391913,18.094292469938807,4.174570187840095,2.367511530630567,0.879915804230056,0.000000000000000,26.678455616010211,0.000002183657231,0.000004467258392,0.000003625540118,0.000000146216262,0.000003088513791,0.000000643070878,0.000015246728207,0.000000245350288,0.000019565625472,0.000001077580828,0.000011021879231,0.000001782465496,71.229239958483802,0.010308249362289,0.169872508900564,14.652120108679608,26.472602739726025,28.356164383561644,51.061643835616444,26.130136986301373,37.431506849315063,43.150684931506852,23.184931506849317,12.979452054794521,5.547945205479452,45.479452054794521,34.349315068493155,36.027397260273972,0.136986301369863,32.397260273972599,36.712328767123289,15.582191780821919,4.109589041095890,64.143835616438366,0.068493150684932,16.917808219178081,1.232876712328767,76.369863013698634
3,1007,2003,73.057987400000002,285.117221444791198,290.369989746752196,0.007462419865064,0.009395796339033,0.986427446209294,101744.124161676227232,0.098690472700501,0.000000016317548,0.000000020307433,0.000000028238688,0.007622676703542,0.115020844565763,1465.598787847873382,99965.021140069555258,0.291671898516200,0.238192235999691,0.181316975737139,0.123444127792394,0.068214863605284,0.000998678357235,0.000003920819959,0.000003582715873,0.000008504147379,0.000000004861835,0.000001871172954,0.009741436446894,0.000007423703281,0.000004769077676,0.000000612072826,0.006478358100203,0.000008976325246,0.000000766435440,0.000005696627855,25.145317334780444,0.000000235334185,0.000000000609200,0.000000001063037,0.000000000377192,0.000000000723610,0.000000003373964,0.000000000699918,0.000000000218154,0.000000010380147,0.000000000233245,0.000000005994225,0.000000000000028,0.000000013479486,0.000000001601430,0.000000013131751,0.000000001689682,0.000000056414364,0.000000003056251,0.000000000358899,0.000000000019447,0.000000001556204,0.000000000164578,0.009712725129444,0.000000004729935,0.000000013603814,17.037290093496722,3.638957935933739,2.754823872966046,0.878014749774689,0.000011574283667,25.289901785165156,0.000001793115476,0.000003660555853,0.000003232456484,0.000000146903744,0.000003387209913,0.000000642778453,0.000015980756564,0.000000196703025,0.000015600967542,0.000000822115559,0.000011901631679,0.000003781823121,72.651517962262062,0.009904532979751,0.247677504482458,13.829282111100266,29.520547945205479,33.013698630136986,51.164383561643831,24.315068493150687,37.534246575342465,48.013698630136986,28.493150684931507,33.458904109589042,12.260273972602739,45.582191780821915,30.890410958904109,43.493150684931507,0.205479452054795,42.534246575342465,43.835616438356162,23.698630136986303,55.924657534246577,59.897260273972606,0.000000000000000,21.130136986301370,2.226027397260274,78.493150684931507
4,1009,2003,75.053119350000003,284.499985265741145,289.682648936568341,0.007281513555367,0.008532731314996,0.979014066638673,101752.892546598464833,0.098588406606724,0.000000016634353,0.000000020577427,0.000000028665550,0.006934708585955,0.115658964763364,2029.708085049063129,99291.479349611210637,0.290383245403297,0.236996457376135,0.180145848804424,0.122175736413122,0.066979939673203,0.001001629095295,0.000003940059669,0.000003391886198,0.000008036291574,0.000000004870680,0.000001455825865,0.009672508599792,0.000007562329598,0.000005127851415,0.000000645334377,0.006526528523785,0.000009101576113,0.000000785947342,0.000005913161443,24.002328270059678,0.000000243128079,0.000000000565979,0.000000000997541,0.000000000366852,0.000000000742636,0.000000002956875,0.000000000648027,0.000000000216384,0.000000010611162,0.000000000229768,0.000000006212334,0.000000000000031,0.000000010923979,0.000000001704878,0.000000014273459,0.000000002001280,0.000000055735722,0.000000002906905,0.000000000399657,0.000000000017084,0.000000001367524,0.000000000133217,0.009374448422807,0.000000004897891,0.000000013819481,16.199899496941782,3.286418753252266,2.782696514699140,0.874594005175991,0.000146268439547,24.159198736496386,0.000001615365357,0.000003314589592,0.000003036213617,0.000000146816070,0.000003487478775,0.000000623800605,0.000015736496782,0.000000178258749,0.000014094606683,0.000000714870948,0.000011742485568,0.000004065884815,73.001927053727783,0.009520829871887,0.313663384140064,13.082501247882803,33.150684931506852,35.719178082191782,47.671232876712324,24.006849315068493,33.938356164383556,46.815068493150683,31.541095890410958,36.917808219178085,13.801369863013699,44.178082191780824,29.486301369863018,45.068493150684930,0.102739726027397,44.143835616438352,45.068493150684930,28.630136986301370,56.849315068493155,57.157534246575345,0.000000000000000,23.527397260273972,3.458904109589041,78.801369863013704
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52315,56037,2019,78.018891969999999,268.877520643594721,277.901787191187623,0.004447572668915,0.003451011412093,0.992354963979521,101728.279968551723869,0.041300386367054,0.000000005335923,0.000000006986436,0.000000010275853,0.005453571999776,0.023199206535957,21050.679685113027517,78504.848716287262505,0.094917082750167,0.077851784197840,0.060037937518508,0.042029720760866,0.025380803098356,0.000600635606696,0.000002422580521,0.000001110569686,0.000003477352537,0.000000004963488,0.000000111738591,0.007766673497341,0.000003159773773,0.000002211268370,0.000000422028650,0.006791831263302,0.000005037527525,0.000000431923381,0.000001236800654,8.157442605447407,0.000000135233435,0.000000000301368,0.000000000639213,0.000000001674582,0.000000000413873,0.000000000717546,0.000000000341813,0.000000000139838,0.000000004846246,0.000000000050751,0.000000002209570,0.000000000000052,0.000000001232036,0.000000000447900,0.000000008277870,0.000000001935387,0.000000054482406,0.000000001051317,0.000000000111938,0.000000000023342,0.000000001935791,0.000000000215410,0.003747796333939,0.000000001132903,0.000000004655746,3.957965558958222,3.538422076256631,0.536378170765603,0.809210342191357,0.031121132335008,7.932261512636336,0.000000500681546,0.000001122565454,0.000002684661987,0.000000077645757,0.000001693437832,0.000000393823836,0.000007737246361,0.000000168953004,0.000013748361167,0.000000845351148,0.000003309765385,0.000000949466150,59.683907659937070,0.003651965784675,1.605265246419298,0.571281695046519,18.835616438356166,25.684931506849317,3.321917808219178,33.150684931506852,0.890410958904110,1.404109589041096,17.157534246575342,40.376712328767120,41.883561643835613,40.684931506849317,15.410958904109590,12.397260273972604,0.000000000000000,9.383561643835616,10.787671232876713,1.027397260273972,49.794520547945204,18.630136986301370,0.000000000000000,64.349315068493155,42.876712328767127,23.390410958904109
52316,56039,2019,85.638149490000004,267.983749276156061,274.029863958509281,0.004886699057345,0.001802071901486,0.985416593005752,101726.315068890355178,0.047702406526535,0.000000006090787,0.000000007551662,0.000000010723385,0.005998536443269,0.022471940682464,23557.228264786190266,75982.937825419445289,0.101118369095114,0.082861597930377,0.063712265860080,0.044185611486018,0.025970239503095,0.000574941124339,0.000002344778896,0.000000895435136,0.000003057346386,0.000000004882069,0.000000094252776,0.007496490549075,0.000002428910212,0.000001280820888,0.000000253964486,0.006919423790743,0.000004645643966,0.000000426195600,0.000000438447614,7.320249762011832,0.000000126051777,0.000000000144742,0.000000000252531,0.000000000178171,0.000000000457028,0.000000000717248,0.000000000321785,0.000000000202220,0.000000006177013,0.000000000107593,0.000000002822114,0.000000000000041,0.000000001264543,0.000000000328929,0.000000002819322,0.000000000277159,0.000000061733279,0.000000001017388,0.000000000117618,0.000000000029800,0.000000002468639,0.000000000260899,0.003688190466983,0.000000000875683,0.000000000165686,0.560664797886773,2.670711888609032,1.785488597639217,0.792676062113158,0.096776138949686,7.142217782428033,0.000000326443010,0.000000688971312,0.000000772002994,0.000000088683828,0.000001812836175,0.000000428757218,0.000008221473509,0.000000164259354,0.000013321274004,0.000000884868172,0.000002847300463,0.000000213348938,69.544237907324401,0.003680952572612,1.238102709958286,-1.859769761436167,4.726027397260274,23.116438356164384,3.424657534246575,32.773972602739725,1.438356164383562,1.232876712328767,8.595890410958903,3.972602739726028,3.835616438356165,54.006849315068493,14.897260273972604,10.376712328767123,0.445205479452055,10.034246575342467,10.376712328767123,0.856164383561644,0.273972602739726,7.534246575342466,0.000000000000000,76.404109589041099,52.500000000000000,20.753424657534246
52317,56041,2019,77.521690719999995,269.968885872635667,277.701483320934017,0.004674971623685,0.002609903875408,0.982910408852341,101741.584218587551732,0.044270471406935,0.000000005798649,0.000000007329074,0.000000010553171,0.006343799473695,0.025104454158955,21702.225723071260290,77908.417302843037760,0.101206776038302,0.083003620213918,0.063943655343822,0.044609998013620,0.026646306462663,0.000594331066349,0.000002406351215,0.000001114224251,0.000003482435859,0.000000004957612,0.000000145421481,0.007705248347299,0.000003083539327,0.000001918737539,0.000000350187799,0.006775792192172,0.000004976728336,0.000000433545454,0.000000940340099,8.093473486304532,0.000000137145948,0.000000000192437,0.000000000377176,0.000000000695268,0.000000000441827,0.000000000815465,0.000000000338547,0.000000000156747,0.000000005430783,0.000000000078347,0.000000002592420,0.000000000000055,0.000000002141827,0.000000000499286,0.000000007515760,0.000000000993720,0.000000054722414,0.000000001095202,0.000000000158386,0.000000000028237,0.000000002350279,0.000000000277151,0.004073729822701,0.000000001089012,0.000000002105004,3.857929071137391,4.031189331292228,0.674010337938586,0.812282421431286,0.054913623758510,7.883527878947897,0.000000425651345,0.000000923241555,0.000001572610985,0.000000081362575,0.000001710751697,0.000000414401108,0.000008121110600,0.000000183399684,0.000014947257657,0.000000984687319,0.000003413513568,0.000000629800449,59.699260933286254,0.003632564658297,1.208359564739262,0.482583691619953,19.212328767123289,32.808219178082190,4.726027397260274,34.452054794520549,5.753424657534246,2.294520547945206,19.075342465753426,39.657534246575345,34.726027397260275,40.410958904109592,15.034246575342467,12.979452054794521,0.102739726027397,12.294520547945204,12.397260273972604,11.815068493150685,17.568493150684933,15.890410958904111,0.000000000000000,66.883561643835620,42.260273972602739,27.191780821917810
52318,56043,2019,78.137158389999996,270.298909555129228,277.841302762785233,0.005428984507879,0.003225551370174,0.997679853659910,101603.050488321445300,0.053027950153890,0.000000005931365,0.000000007478361,0.000000010677780,0.005445779211497,0.023839783179045,17437.488688889050536,81987.763448374767904,0.111294570197237,0.090968037761792,0.069772435975008,0.048312734751407,0.028470369960335,0.000640373899967,0.000002574499355,0.000001141800622,0.000003523095669,0.000000004936986,0.000000077919272,0.008125626963654,0.000002981591518,0.000001971769568,0.000000350826178,0.006927663456029,0.000005222864432,0.000000472347647,0.000000787430477,9.511025373385666,0.000000138964912,0.000000000231663,0.000000000449157,0.000000000702725,0.000000000421595,0.000000000684047,0.000000000291158,0.000000000159736,0.000000005512380,0.000000000056100,0.000000002464156,0.000000000000044,0.000000000774820,0.000000000320651,0.000000005035151,0.000000000538975,0.000000049046933,0.000000000972974,0.000000000108903,0.000000000018428,0.000000001521941,0.000000000190813,0.004194294518206,0.000000000867287,0.000000000795473,4.343174071493024,4.581955061795957,0.526594406537813,0.784423119137502,0.018203938774941,9.207770779673563,0.000000502582240,0.000001103004055,0.000002179409299,0.000000096633407,0.000002245828337,0.000000478279425,0.000009649527186,0.000000176687043,0.000014365650955,0.000000878019980,0.000003392326354,0.000000552959722,58.175270067637896,0.004117935850445,0.673376207603366,0.807879855752541,20.582191780821919,25.102739726027394,2.876712328767123,31.609589041095891,0.068493150684932,0.205479452054795,10.582191780821917,20.479452054794521,17.910958904109588,30.787671232876711,13.493150684931507,14.417808219178083,0.000000000000000,13.732876712328768,13.835616438356164,0.410958904109589,2.123287671232877,18.767123287671232,0.000000000000000,63.321917808219176,40.650684931506845,27.602739726027398


In [7]:
complete_df.to_pickle('final_dataset_103_features.pkl')