In [45]:
import numpy as np 
import pandas as pd
import geopandas as gpd
import folium
import plotly
import plotly.express as px

In [34]:
# Importing geojson file with county boundaries and creating column for Maryland FIPS codes
md_boundaries = gpd.read_file('/kaggle/input/personal-income/Maryland_Physical_Boundaries_-_County_Boundaries_(Generalized).geojson')
md_boundaries['GeoFIPS'] = md_boundaries.county_fip.apply(lambda x: '24' + str(x).zfill(3))

In [131]:
# Importing csv file with MD personal incomes and removing unwanted columns
md_inc = pd.read_csv('/kaggle/input/personal-income/CAINC1_MD_1969_2022.csv', 
                     skipfooter=4,
                     engine = 'python' # default 'c' engine does not support skipfooter
                    ).drop(columns = ['TableName', 'IndustryClassification', 'Unit', 'LineCode'])

md_inc['Description'] = md_inc.Description.apply(lambda x: x.split(' (')[0])
md_inc['GeoName'] = md_inc.GeoName.apply(lambda x: x.split(',')[0]).str.replace('(Independent City)', 'City')

In [132]:
# Creating new dataframes for income and population from larger dataframe
md_per_inc = md_inc.loc[md_inc.Description == 'Personal income', :].set_index('GeoName')
md_pop = md_inc.loc[md_inc.Description == 'Population',:].set_index('GeoName')

In [196]:
# Dividing county values by state values to get percentages
md_per_inc_pct = md_per_inc.loc['Allegany':,
                                '1969':'2022'
                               ].div(md_per_inc.loc['Maryland','1969':'2022']
                                    ).stack().reset_index().set_axis(['County', 'Year', 'Personal_income'], axis = 1)
md_pop_pct = md_pop.loc['Allegany':,
                        '1969':'2022'
                       ].div(md_pop.loc['Maryland','1969':'2022']
                            ).stack().reset_index().set_axis(['County', 'Year', 'Population'], axis = 1)

In [207]:
md_diff = md_per_inc_pct.merge(md_pop_pct, on = ['County', 'Year'])

# Normalizing personal income percentage by population percentage for comparison
md_diff['pct_diff'] = md_diff.Personal_income/md_diff.Population

# Applying log transformation
md_diff['log_diff'] = md_diff.pct_diff.map(np.log)

In [212]:
plot_df = md_diff.merge(md_boundaries.shape_Area * 100, left_on = 'County', right_index = True)

In [None]:
fig = px.line(plot_df,
              x = 'Year',
              y = 'log_diff',
              line_group='County'
             )
fig.show()

In [214]:
fig = px.scatter(plot_df,
                 x = 'Population',  
                 y = 'Personal_income',
                 size = 'shape_Area',
                 animation_frame='Year'
                )
fig.show()





In [None]:
fig = px.choropleth(plot_df,
                    geojson=md_boundaries.geometry,
                    locations = plot_df.County,
                    color = 'log_diff',
                    animation_frame='Year',
                    projection ="mercator",
                    range_color=[plot_df.log_diff.min(), plot_df.log_diff.max()],
#                     width = 1000,
#                     height = 1000
                   )
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
# fig.write_html('MD_personal_income.html')



