In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [32]:
states_df = pd.read_csv("climate_with_states.csv")

In [33]:
states_df.STATE.unique()

array(['American Samoa', 'Yap', 'Chuuk', 'Guam', 'Rālik', 'Montana',
       'Colorado', 'California', 'Nevada', 'Arizona', 'West Virginia',
       'Tennessee', 'North Carolina', 'Georgia', 'Kentucky', 'Alabama',
       'Florida', 'South Carolina', 'Illinois', 'Texas', 'Kansas',
       'Arkansas', 'Louisiana', 'Mississippi', 'Missouri', 'Oklahoma',
       'New York', 'Pennsylvania', 'New Hampshire', 'Ohio', 'Virginia',
       'Maryland', 'Delaware', 'New Jersey', 'Maine', 'Massachusetts',
       'Connecticut', 'Vermont', 'Rhode Island', 'Michigan', 'Indiana',
       'Wisconsin', 'Minnesota', 'North Dakota', 'South Dakota', 'Iowa',
       'Nebraska', 'Hawaii', 'New Mexico', 'Wyoming', 'Utah', 'Idaho',
       'Washington', 'Oregon', 'Alaska'], dtype=object)

In [34]:
US_STATES = ['Montana',
       'Colorado', 'California', 'Nevada', 'Arizona', 'West Virginia',
       'Tennessee', 'North Carolina', 'Georgia', 'Kentucky', 'Alabama',
       'Florida', 'South Carolina', 'Illinois', 'Texas', 'Kansas',
       'Arkansas', 'Louisiana', 'Mississippi', 'Missouri', 'Oklahoma',
       'New York', 'Pennsylvania', 'New Hampshire', 'Ohio', 'Virginia',
       'Maryland', 'Delaware', 'New Jersey', 'Maine', 'Massachusetts',
       'Connecticut', 'Vermont', 'Rhode Island', 'Michigan', 'Indiana',
       'Wisconsin', 'Minnesota', 'North Dakota', 'South Dakota', 'Iowa',
       'Nebraska', 'Hawaii', 'New Mexico', 'Wyoming', 'Utah', 'Idaho',
       'Washington', 'Oregon', 'Alaska']

In [35]:
MEAS_COLUMNS = ['HLY-TEMP-NORMAL', 'HLY-TEMP-10PCTL', 'HLY-TEMP-90PCTL', 'HLY-DEWP-NORMAL', 'HLY-DEWP-10PCTL', 'HLY-DEWP-90PCTL', 'HLY-PRES-NORMAL', 'HLY-PRES-10PCTL', 'HLY-PRES-90PCTL', 'HLY-CLDH-NORMAL', 'HLY-HTDH-NORMAL', 'HLY-CLOD-PCTCLR', 'HLY-CLOD-PCTFEW', 'HLY-CLOD-PCTSCT', 'HLY-CLOD-PCTBKN', 'HLY-CLOD-PCTOVC', 'HLY-HIDX-NORMAL', 'HLY-WCHL-NORMAL', 'HLY-WIND-AVGSPD', 'HLY-WIND-PCTCLM', 'HLY-WIND-VCTDIR', 'HLY-WIND-VCTSPD', 'HLY-WIND-1STDIR', 'HLY-WIND-1STPCT', 'HLY-WIND-2NDDIR', 'HLY-WIND-2NDPCT']


In [36]:
# Only include states, not US territories
states_df = states_df[states_df.STATE.isin(US_STATES)]

In [37]:
states_df.shape

(4038360, 114)

In [38]:
365 * 24 * 467

4090920

In [39]:
# Turn -9999 to missing
states_df = states_df.replace(-9999, np.nan)

In [40]:
# Filter out only the columns we need
states_df = states_df[[col for col in states_df.columns if not ("years_" in col or "flag_" in col)]]

In [41]:
missing_counts = states_df.groupby(["STATE","month"])[MEAS_COLUMNS].apply(lambda x: x.isna().sum())

In [42]:
missing_counts

Unnamed: 0_level_0,Unnamed: 1_level_0,HLY-TEMP-NORMAL,HLY-TEMP-10PCTL,HLY-TEMP-90PCTL,HLY-DEWP-NORMAL,HLY-DEWP-10PCTL,HLY-DEWP-90PCTL,HLY-PRES-NORMAL,HLY-PRES-10PCTL,HLY-PRES-90PCTL,HLY-CLDH-NORMAL,...,HLY-HIDX-NORMAL,HLY-WCHL-NORMAL,HLY-WIND-AVGSPD,HLY-WIND-PCTCLM,HLY-WIND-VCTDIR,HLY-WIND-VCTSPD,HLY-WIND-1STDIR,HLY-WIND-1STPCT,HLY-WIND-2NDDIR,HLY-WIND-2NDPCT
STATE,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,1,0,0,0,0,0,0,334,334,334,0,...,0,0,0,0,0,0,0,0,0,0
Alabama,2,0,0,0,0,0,0,168,168,168,0,...,0,0,0,0,0,0,0,0,0,0
Alabama,3,0,0,0,0,0,0,214,214,214,0,...,0,0,0,0,0,0,0,0,0,0
Alabama,4,0,0,0,0,0,0,348,348,348,0,...,0,0,0,0,0,0,0,0,0,0
Alabama,5,0,0,0,0,0,0,409,409,409,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,8,0,0,0,63,63,63,1557,1557,1557,0,...,63,58,31,31,31,31,31,31,31,31
Wyoming,9,0,0,0,52,52,52,1542,1542,1542,0,...,52,83,80,80,80,80,80,80,80,80
Wyoming,10,0,0,0,61,61,61,1596,1596,1596,0,...,61,72,66,66,66,66,66,66,66,66
Wyoming,11,0,0,0,45,45,45,1508,1508,1508,0,...,45,47,47,47,47,47,47,47,47,47


In [43]:
missing_proportions = states_df.groupby(["STATE","month"])[MEAS_COLUMNS].apply(lambda x: x.isna().mean())

In [44]:
missing_proportions.max()

HLY-TEMP-NORMAL    0.105511
HLY-TEMP-10PCTL    0.105511
HLY-TEMP-90PCTL    0.105511
HLY-DEWP-NORMAL    0.130098
HLY-DEWP-10PCTL    0.130098
HLY-DEWP-90PCTL    0.130098
HLY-PRES-NORMAL    0.371304
HLY-PRES-10PCTL    0.371304
HLY-PRES-90PCTL    0.371304
HLY-CLDH-NORMAL    0.105511
HLY-HTDH-NORMAL    0.105511
HLY-CLOD-PCTCLR    0.838206
HLY-CLOD-PCTFEW    0.838206
HLY-CLOD-PCTSCT    0.838206
HLY-CLOD-PCTBKN    0.838206
HLY-CLOD-PCTOVC    0.838206
HLY-HIDX-NORMAL    0.130515
HLY-WCHL-NORMAL    0.126344
HLY-WIND-AVGSPD    0.049731
HLY-WIND-PCTCLM    0.049731
HLY-WIND-VCTDIR    0.049731
HLY-WIND-VCTSPD    0.049731
HLY-WIND-1STDIR    0.049731
HLY-WIND-1STPCT    0.049731
HLY-WIND-2NDDIR    0.049731
HLY-WIND-2NDPCT    0.049731
dtype: float64

In [45]:
missing_proportions.mean()

HLY-TEMP-NORMAL    0.001198
HLY-TEMP-10PCTL    0.001198
HLY-TEMP-90PCTL    0.001198
HLY-DEWP-NORMAL    0.002528
HLY-DEWP-10PCTL    0.002528
HLY-DEWP-90PCTL    0.002528
HLY-PRES-NORMAL    0.140646
HLY-PRES-10PCTL    0.140646
HLY-PRES-90PCTL    0.140646
HLY-CLDH-NORMAL    0.001198
HLY-HTDH-NORMAL    0.001198
HLY-CLOD-PCTCLR    0.233470
HLY-CLOD-PCTFEW    0.233470
HLY-CLOD-PCTSCT    0.233470
HLY-CLOD-PCTBKN    0.233470
HLY-CLOD-PCTOVC    0.233470
HLY-HIDX-NORMAL    0.002534
HLY-WCHL-NORMAL    0.002539
HLY-WIND-AVGSPD    0.001499
HLY-WIND-PCTCLM    0.001499
HLY-WIND-VCTDIR    0.001499
HLY-WIND-VCTSPD    0.001499
HLY-WIND-1STDIR    0.001502
HLY-WIND-1STPCT    0.001502
HLY-WIND-2NDDIR    0.001502
HLY-WIND-2NDPCT    0.001502
dtype: float64

In [12]:
# Based on those values of missing proportions, we are dropping the columns HLY-CLOD-PCTCLR, HLY-CLOD-PCTFEW, HLY-CLOD-PCTSCT, HLY-CLOD-PCTBKN, and HLY-CLOD-PCTOVC
# And also HLY-PRES


In [46]:
GOOD_MEAS_COLUMNS = ['HLY-TEMP-NORMAL', 'HLY-TEMP-10PCTL', 'HLY-TEMP-90PCTL', 'HLY-DEWP-NORMAL', 'HLY-DEWP-10PCTL', 'HLY-DEWP-90PCTL', 'HLY-CLDH-NORMAL', 'HLY-HTDH-NORMAL', 'HLY-HIDX-NORMAL', 'HLY-WCHL-NORMAL', 'HLY-WIND-AVGSPD', 'HLY-WIND-PCTCLM', 'HLY-WIND-VCTDIR', 'HLY-WIND-VCTSPD', 'HLY-WIND-1STDIR', 'HLY-WIND-1STPCT', 'HLY-WIND-2NDDIR', 'HLY-WIND-2NDPCT']


In [47]:
states_df2 = states_df[[col for col in states_df.columns if not ("HLY-CLOD" in col or "HLY-PRES" in col)]]

In [48]:
states_df2_grouped = states_df2.groupby(["STATE","month"])[GOOD_MEAS_COLUMNS].apply(lambda x: x.mean())

In [49]:
states_df2_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,HLY-TEMP-NORMAL,HLY-TEMP-10PCTL,HLY-TEMP-90PCTL,HLY-DEWP-NORMAL,HLY-DEWP-10PCTL,HLY-DEWP-90PCTL,HLY-CLDH-NORMAL,HLY-HTDH-NORMAL,HLY-HIDX-NORMAL,HLY-WCHL-NORMAL,HLY-WIND-AVGSPD,HLY-WIND-PCTCLM,HLY-WIND-VCTDIR,HLY-WIND-VCTSPD,HLY-WIND-1STDIR,HLY-WIND-1STPCT,HLY-WIND-2NDDIR,HLY-WIND-2NDPCT
STATE,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Alabama,1,45.896304,31.040575,61.456250,35.785534,16.403545,55.925202,0.269405,19.375302,45.896304,43.232409,6.951142,16.773673,229.971102,1.301361,4.528562,18.356922,4.726815,14.417423
Alabama,2,49.679501,35.924275,63.863039,38.478237,20.950856,57.460361,0.567448,15.889472,49.679501,47.604836,7.252753,15.624721,218.772693,1.286012,4.369048,18.101525,4.664249,14.720722
Alabama,3,56.742473,43.463508,69.175941,43.799345,26.681452,60.043532,1.733031,9.990810,56.742473,55.635181,7.379200,15.894758,186.998152,1.347581,4.581149,17.609039,4.661794,14.868616
Alabama,4,64.101997,53.041510,73.818125,51.627760,37.186302,64.222934,3.908611,4.806979,64.107917,63.816233,6.918559,18.100938,186.184028,1.784514,4.634549,19.241354,5.101910,15.016632
Alabama,5,71.805225,62.842876,79.097900,60.433737,48.984140,68.871001,8.212114,1.405645,72.331989,71.774429,5.879452,22.252050,171.455645,1.624580,4.533938,18.313978,4.634241,14.676445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,8,67.179385,57.783216,75.414432,41.067193,30.525726,50.866208,6.299311,4.120380,67.264323,67.024618,8.347272,11.848269,226.428306,3.643354,5.638406,26.413714,5.106570,17.541395
Wyoming,9,57.212760,44.841545,68.081458,34.458041,23.457078,45.281219,2.638108,10.426840,57.264278,56.040303,8.742606,12.505018,242.592254,3.916708,5.920246,27.191144,5.498239,17.124718
Wyoming,10,44.132124,30.168196,56.792658,26.493940,15.882023,36.357002,0.434425,21.303814,44.178985,40.463197,9.392338,13.022834,259.259429,4.744631,6.418620,28.275025,5.761128,17.422868
Wyoming,11,32.076701,17.010747,46.239306,18.308679,6.479983,28.827944,0.021510,32.946389,32.099283,25.679643,9.870121,14.161561,264.577980,5.931052,6.490285,30.672501,6.040609,17.692596


In [26]:
states_df2_grouped.to_csv("climate_by_state.csv")

In [50]:
states_df_grouped = states_df.groupby(["STATE","month"])[MEAS_COLUMNS].apply(lambda x: x.mean())

In [52]:
nan_mask = missing_proportions[missing_proportions <= 0.5] * 0
# This is a wacky way to get a dataframe where every statistic is NaN iff more than 50% of the values are missing for that month

In [54]:
(nan_mask.isnull() == (missing_proportions > 0.5)).all()

HLY-TEMP-NORMAL    True
HLY-TEMP-10PCTL    True
HLY-TEMP-90PCTL    True
HLY-DEWP-NORMAL    True
HLY-DEWP-10PCTL    True
HLY-DEWP-90PCTL    True
HLY-PRES-NORMAL    True
HLY-PRES-10PCTL    True
HLY-PRES-90PCTL    True
HLY-CLDH-NORMAL    True
HLY-HTDH-NORMAL    True
HLY-CLOD-PCTCLR    True
HLY-CLOD-PCTFEW    True
HLY-CLOD-PCTSCT    True
HLY-CLOD-PCTBKN    True
HLY-CLOD-PCTOVC    True
HLY-HIDX-NORMAL    True
HLY-WCHL-NORMAL    True
HLY-WIND-AVGSPD    True
HLY-WIND-PCTCLM    True
HLY-WIND-VCTDIR    True
HLY-WIND-VCTSPD    True
HLY-WIND-1STDIR    True
HLY-WIND-1STPCT    True
HLY-WIND-2NDDIR    True
HLY-WIND-2NDPCT    True
dtype: bool

In [65]:
assert np.all((nan_mask.isnull() == (missing_proportions > 0.5)).all())

In [55]:
nan_mask.shape == missing_proportions.shape

True

In [56]:
states_df_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,HLY-TEMP-NORMAL,HLY-TEMP-10PCTL,HLY-TEMP-90PCTL,HLY-DEWP-NORMAL,HLY-DEWP-10PCTL,HLY-DEWP-90PCTL,HLY-PRES-NORMAL,HLY-PRES-10PCTL,HLY-PRES-90PCTL,HLY-CLDH-NORMAL,...,HLY-HIDX-NORMAL,HLY-WCHL-NORMAL,HLY-WIND-AVGSPD,HLY-WIND-PCTCLM,HLY-WIND-VCTDIR,HLY-WIND-VCTSPD,HLY-WIND-1STDIR,HLY-WIND-1STPCT,HLY-WIND-2NDDIR,HLY-WIND-2NDPCT
STATE,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,1,45.896304,31.040575,61.456250,35.785534,16.403545,55.925202,1021.493378,1012.674137,1029.811250,0.269405,...,45.896304,43.232409,6.951142,16.773673,229.971102,1.301361,4.528562,18.356922,4.726815,14.417423
Alabama,2,49.679501,35.924275,63.863039,38.478237,20.950856,57.460361,1020.109946,1012.019316,1027.668721,0.567448,...,49.679501,47.604836,7.252753,15.624721,218.772693,1.286012,4.369048,18.101525,4.664249,14.720722
Alabama,3,56.742473,43.463508,69.175941,43.799345,26.681452,60.043532,1018.526386,1010.551708,1026.051516,1.733031,...,56.742473,55.635181,7.379200,15.894758,186.998152,1.347581,4.581149,17.609039,4.661794,14.868616
Alabama,4,64.101997,53.041510,73.818125,51.627760,37.186302,64.222934,1016.634294,1009.963562,1022.851571,3.908611,...,64.107917,63.816233,6.918559,18.100938,186.184028,1.784514,4.634549,19.241354,5.101910,15.016632
Alabama,5,71.805225,62.842876,79.097900,60.433737,48.984140,68.871001,1016.114920,1010.849829,1021.139293,8.212114,...,72.331989,71.774429,5.879452,22.252050,171.455645,1.624580,4.533938,18.313978,4.634241,14.676445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,8,67.179385,57.783216,75.414432,41.067193,30.525726,50.866208,1014.070125,1008.560137,1019.470375,6.299311,...,67.264323,67.024618,8.347272,11.848269,226.428306,3.643354,5.638406,26.413714,5.106570,17.541395
Wyoming,9,57.212760,44.841545,68.081458,34.458041,23.457078,45.281219,1014.784021,1007.040635,1022.124253,2.638108,...,57.264278,56.040303,8.742606,12.505018,242.592254,3.916708,5.920246,27.191144,5.498239,17.124718
Wyoming,10,44.132124,30.168196,56.792658,26.493940,15.882023,36.357002,1016.453489,1006.576056,1026.135927,0.434425,...,44.178985,40.463197,9.392338,13.022834,259.259429,4.744631,6.418620,28.275025,5.761128,17.422868
Wyoming,11,32.076701,17.010747,46.239306,18.308679,6.479983,28.827944,1018.217827,1006.612465,1028.811618,0.021510,...,32.099283,25.679643,9.870121,14.161561,264.577980,5.931052,6.490285,30.672501,6.040609,17.692596


In [57]:
states_df_grouped = states_df_grouped + nan_mask

In [58]:
states_df_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,HLY-TEMP-NORMAL,HLY-TEMP-10PCTL,HLY-TEMP-90PCTL,HLY-DEWP-NORMAL,HLY-DEWP-10PCTL,HLY-DEWP-90PCTL,HLY-PRES-NORMAL,HLY-PRES-10PCTL,HLY-PRES-90PCTL,HLY-CLDH-NORMAL,...,HLY-HIDX-NORMAL,HLY-WCHL-NORMAL,HLY-WIND-AVGSPD,HLY-WIND-PCTCLM,HLY-WIND-VCTDIR,HLY-WIND-VCTSPD,HLY-WIND-1STDIR,HLY-WIND-1STPCT,HLY-WIND-2NDDIR,HLY-WIND-2NDPCT
STATE,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,1,45.896304,31.040575,61.456250,35.785534,16.403545,55.925202,1021.493378,1012.674137,1029.811250,0.269405,...,45.896304,43.232409,6.951142,16.773673,229.971102,1.301361,4.528562,18.356922,4.726815,14.417423
Alabama,2,49.679501,35.924275,63.863039,38.478237,20.950856,57.460361,1020.109946,1012.019316,1027.668721,0.567448,...,49.679501,47.604836,7.252753,15.624721,218.772693,1.286012,4.369048,18.101525,4.664249,14.720722
Alabama,3,56.742473,43.463508,69.175941,43.799345,26.681452,60.043532,1018.526386,1010.551708,1026.051516,1.733031,...,56.742473,55.635181,7.379200,15.894758,186.998152,1.347581,4.581149,17.609039,4.661794,14.868616
Alabama,4,64.101997,53.041510,73.818125,51.627760,37.186302,64.222934,1016.634294,1009.963562,1022.851571,3.908611,...,64.107917,63.816233,6.918559,18.100938,186.184028,1.784514,4.634549,19.241354,5.101910,15.016632
Alabama,5,71.805225,62.842876,79.097900,60.433737,48.984140,68.871001,1016.114920,1010.849829,1021.139293,8.212114,...,72.331989,71.774429,5.879452,22.252050,171.455645,1.624580,4.533938,18.313978,4.634241,14.676445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,8,67.179385,57.783216,75.414432,41.067193,30.525726,50.866208,1014.070125,1008.560137,1019.470375,6.299311,...,67.264323,67.024618,8.347272,11.848269,226.428306,3.643354,5.638406,26.413714,5.106570,17.541395
Wyoming,9,57.212760,44.841545,68.081458,34.458041,23.457078,45.281219,1014.784021,1007.040635,1022.124253,2.638108,...,57.264278,56.040303,8.742606,12.505018,242.592254,3.916708,5.920246,27.191144,5.498239,17.124718
Wyoming,10,44.132124,30.168196,56.792658,26.493940,15.882023,36.357002,1016.453489,1006.576056,1026.135927,0.434425,...,44.178985,40.463197,9.392338,13.022834,259.259429,4.744631,6.418620,28.275025,5.761128,17.422868
Wyoming,11,32.076701,17.010747,46.239306,18.308679,6.479983,28.827944,1018.217827,1006.612465,1028.811618,0.021510,...,32.099283,25.679643,9.870121,14.161561,264.577980,5.931052,6.490285,30.672501,6.040609,17.692596


In [67]:
states_df_grouped.isnull().sum()

HLY-TEMP-NORMAL     0
HLY-TEMP-10PCTL     0
HLY-TEMP-90PCTL     0
HLY-DEWP-NORMAL     0
HLY-DEWP-10PCTL     0
HLY-DEWP-90PCTL     0
HLY-PRES-NORMAL     0
HLY-PRES-10PCTL     0
HLY-PRES-90PCTL     0
HLY-CLDH-NORMAL     0
HLY-HTDH-NORMAL     0
HLY-CLOD-PCTCLR    46
HLY-CLOD-PCTFEW    46
HLY-CLOD-PCTSCT    46
HLY-CLOD-PCTBKN    46
HLY-CLOD-PCTOVC    46
HLY-HIDX-NORMAL     0
HLY-WCHL-NORMAL     0
HLY-WIND-AVGSPD     0
HLY-WIND-PCTCLM     0
HLY-WIND-VCTDIR     0
HLY-WIND-VCTSPD     0
HLY-WIND-1STDIR     0
HLY-WIND-1STPCT     0
HLY-WIND-2NDDIR     0
HLY-WIND-2NDPCT     0
dtype: int64

In [60]:
states_df_grouped.to_csv("climate_by_state_more_vars.csv")

In [66]:
states_df_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,HLY-TEMP-NORMAL,HLY-TEMP-10PCTL,HLY-TEMP-90PCTL,HLY-DEWP-NORMAL,HLY-DEWP-10PCTL,HLY-DEWP-90PCTL,HLY-PRES-NORMAL,HLY-PRES-10PCTL,HLY-PRES-90PCTL,HLY-CLDH-NORMAL,...,HLY-HIDX-NORMAL,HLY-WCHL-NORMAL,HLY-WIND-AVGSPD,HLY-WIND-PCTCLM,HLY-WIND-VCTDIR,HLY-WIND-VCTSPD,HLY-WIND-1STDIR,HLY-WIND-1STPCT,HLY-WIND-2NDDIR,HLY-WIND-2NDPCT
STATE,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,1,45.896304,31.040575,61.456250,35.785534,16.403545,55.925202,1021.493378,1012.674137,1029.811250,0.269405,...,45.896304,43.232409,6.951142,16.773673,229.971102,1.301361,4.528562,18.356922,4.726815,14.417423
Alabama,2,49.679501,35.924275,63.863039,38.478237,20.950856,57.460361,1020.109946,1012.019316,1027.668721,0.567448,...,49.679501,47.604836,7.252753,15.624721,218.772693,1.286012,4.369048,18.101525,4.664249,14.720722
Alabama,3,56.742473,43.463508,69.175941,43.799345,26.681452,60.043532,1018.526386,1010.551708,1026.051516,1.733031,...,56.742473,55.635181,7.379200,15.894758,186.998152,1.347581,4.581149,17.609039,4.661794,14.868616
Alabama,4,64.101997,53.041510,73.818125,51.627760,37.186302,64.222934,1016.634294,1009.963562,1022.851571,3.908611,...,64.107917,63.816233,6.918559,18.100938,186.184028,1.784514,4.634549,19.241354,5.101910,15.016632
Alabama,5,71.805225,62.842876,79.097900,60.433737,48.984140,68.871001,1016.114920,1010.849829,1021.139293,8.212114,...,72.331989,71.774429,5.879452,22.252050,171.455645,1.624580,4.533938,18.313978,4.634241,14.676445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,8,67.179385,57.783216,75.414432,41.067193,30.525726,50.866208,1014.070125,1008.560137,1019.470375,6.299311,...,67.264323,67.024618,8.347272,11.848269,226.428306,3.643354,5.638406,26.413714,5.106570,17.541395
Wyoming,9,57.212760,44.841545,68.081458,34.458041,23.457078,45.281219,1014.784021,1007.040635,1022.124253,2.638108,...,57.264278,56.040303,8.742606,12.505018,242.592254,3.916708,5.920246,27.191144,5.498239,17.124718
Wyoming,10,44.132124,30.168196,56.792658,26.493940,15.882023,36.357002,1016.453489,1006.576056,1026.135927,0.434425,...,44.178985,40.463197,9.392338,13.022834,259.259429,4.744631,6.418620,28.275025,5.761128,17.422868
Wyoming,11,32.076701,17.010747,46.239306,18.308679,6.479983,28.827944,1018.217827,1006.612465,1028.811618,0.021510,...,32.099283,25.679643,9.870121,14.161561,264.577980,5.931052,6.490285,30.672501,6.040609,17.692596


In [70]:
states_df_grouped_read = pd.read_csv("climate_by_state_more_vars.csv")

In [77]:
states_df_grouped.index

MultiIndex([('Alabama',  1),
            ('Alabama',  2),
            ('Alabama',  3),
            ('Alabama',  4),
            ('Alabama',  5),
            ('Alabama',  6),
            ('Alabama',  7),
            ('Alabama',  8),
            ('Alabama',  9),
            ('Alabama', 10),
            ...
            ('Wyoming',  3),
            ('Wyoming',  4),
            ('Wyoming',  5),
            ('Wyoming',  6),
            ('Wyoming',  7),
            ('Wyoming',  8),
            ('Wyoming',  9),
            ('Wyoming', 10),
            ('Wyoming', 11),
            ('Wyoming', 12)],
           names=['STATE', 'month'], length=600)

In [73]:
states_df_grouped_read.isnull().sum()

STATE              0
month              0
HLY-TEMP-NORMAL    0
HLY-TEMP-10PCTL    0
HLY-TEMP-90PCTL    0
HLY-DEWP-NORMAL    0
HLY-DEWP-10PCTL    0
HLY-DEWP-90PCTL    0
HLY-PRES-NORMAL    0
HLY-PRES-10PCTL    0
HLY-PRES-90PCTL    0
HLY-CLDH-NORMAL    0
HLY-HTDH-NORMAL    0
HLY-CLOD-PCTCLR    0
HLY-CLOD-PCTFEW    0
HLY-CLOD-PCTSCT    0
HLY-CLOD-PCTBKN    0
HLY-CLOD-PCTOVC    0
HLY-HIDX-NORMAL    0
HLY-WCHL-NORMAL    0
HLY-WIND-AVGSPD    0
HLY-WIND-PCTCLM    0
HLY-WIND-VCTSPD    0
HLY-WIND-1STPCT    0
HLY-WIND-2NDPCT    0
dtype: int64