In [1]:
from IPython.display import display
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 100)
import re
from utils import plot
import feature_engg

In [2]:
data = pd.read_csv('./csv_data/data.csv', low_memory=False, dtype=str)
feature_df = pd.read_csv('./csv_data/excel_doc.csv', low_memory=False, dtype=str)
fips_all_codes_df = pd.read_csv('./csv_data/fips_all_codes.csv', low_memory=False, dtype=str)

### Field values for indentification on maps

In [3]:
feature_df.head(n=20)

Unnamed: 0,field,col_col,year_of_data,variable_name,characteristics,source,date_on
0,f00001,00001-00001,,Blank,,,
1,f00002,00002-00006,,Header - FIPS St & Cty Code,,Derived From GSA,
2,f00003,00007-00011,,Entity of File,Equals 'AHRF ',,
3,f00004,00012-00031,,Secondary Entity Of File,Mod FIPS St & Cty Code,Derived From GSA,
4,f00005,00032-00035,,Date of File,Equals '2017',,07/17
5,f00006,00036-00040,,Date of Creation,Equals '17212',,07/17
6,f00007,00041-00045,,File Length,Equals '31446',,07/17
7,f00008,00046-00064,,State Name,,,
8,f12424,00065-00066,,State Name Abbreviation,,U.S. Post Office,02/96
9,f00010,00067-00091,,County Name,,"DDH,9-33",


state code = f00011 | county code = f00012

### Binary feature Identification

- filter out all features that have 1 and 3 or more feature values in them.
- visualize them on a map.


In [4]:
nunique_columns_df = pd.DataFrame(data.nunique()).reset_index()
nunique_columns_df.columns = ['column', 'nunique']
binary_feature_list = nunique_columns_df[(nunique_columns_df['nunique'] < 3) 
                                         & (nunique_columns_df['nunique'] > 1)]['column'].tolist()

In [5]:
feature_df[feature_df['field'].isin(binary_feature_list)]

Unnamed: 0,field,col_col,year_of_data,variable_name,characteristics,source,date_on
22,f1419515,00223-00230,2015.0,CBSA County Status,Central or Outlying,Census Pop Division,07/17
30,f1248115,00349-00349,2015.0,Farming-Dependent Typology Code,,ERS Dept of Agriculture,07/16
31,f1248215,00350-00350,2015.0,Mining-Dependent Typology Code,,ERS Dept of Agriculture,07/16
32,f1248315,00351-00351,2015.0,Manufacturing-Dep Typology Code,,ERS Dept of Agriculture,07/16
33,f1248415,00352-00352,2015.0,Fed/St Govt-Depdnt Typolgy Code,Federal/State Government,ERS Dept of Agriculture,07/16
34,f1546915,00353-00353,2015.0,Recreation Typolpgy Code,,ERS Dept of Agriculture,07/16
35,f1248615,00354-00354,2015.0,Nonspecializd-Dep Typology Code,,ERS Dept of Agriculture,07/16
36,f1397515,00355-00355,2015.0,Low Education Typology Code,,ERS Dept of Agriculture,07/16
37,f1397615,00356-00356,2015.0,Low Employment Typology Code,,ERS Dept of Agriculture,07/16
38,f1533414,00357-00357,2014.0,High Poverty Typology Code,,ERS Dept of Agriculture,08/14


### US map county level feature details

In [9]:
% matplotlib notebook
feature = 'f1419515'
plt_title = feature_df[feature_df.field == feature]['variable_name'].tolist()[0]
plt_desc = ''
for key, value in feature_df[feature_df.field == feature].to_dict().items():
    if key in ['year_of_data', 'date_on', 'characteristics']:
        plt_desc += key+": "+str(list(value.values())[0])+"\n"
feature_values_dict = feature_engg.extract_feature_data(data=data, feature=feature)
plot.colour_code_usa_country(data=feature_values_dict, title=plt_title, desc=plt_desc, state_borders=True,
                        county_borders=True, state_names=False)

<IPython.core.display.Javascript object>

### US states map county level details

In [8]:
% matplotlib notebook
feature = 'f1419515'
plt_title = feature_df[feature_df.field == feature]['variable_name'].tolist()[0]
plt_desc = ''
for key, value in feature_df[feature_df.field == feature].to_dict().items():
    if key in ['year_of_data', 'date_on', 'characteristics']:
        plt_desc += key+": "+str(list(value.values())[0])+"\n"
feature_values_dict = feature_engg.extract_feature_data(data=data, feature=feature, state='Alabama')

plot.colour_code_usa_state(data=feature_values_dict, state='Alabama', title=plt_title, desc=plt_desc)

<IPython.core.display.Javascript object>