In [None]:
import numpy as np 
import pandas as pd
import geopandas as gpd
import folium
import plotly
import plotly.express as px

In [None]:
# Importing geojson file with county boundaries and creating column for Maryland FIPS codes
md_boundaries = gpd.read_file('/kaggle/input/personal-income/Maryland_Physical_Boundaries_-_County_Boundaries_(Generalized).geojson'
                             ).set_index('county')
md_boundaries['GeoFIPS'] = md_boundaries.county_fip.apply(lambda x: '24' + str(x).zfill(3))
md_boundaries['Area_sq_km'] = md_boundaries.to_crs('3857').area/1000


In [None]:
# Importing csv file with MD personal incomes and removing unwanted columns
md_inc = pd.read_csv('/kaggle/input/personal-income/CAINC1_MD_1969_2022.csv', 
                     skipfooter=4,
                     engine = 'python' # default 'c' engine does not support skipfooter
                    ).drop(columns = ['TableName', 'IndustryClassification', 'Unit', 'LineCode'])

md_inc['Description'] = md_inc.Description.apply(lambda x: x.split(' (')[0])
md_inc['GeoName'] = md_inc.GeoName.apply(lambda x: x.split(',')[0]).str.replace('(Independent City)', 'City')

In [None]:
# Creating new dataframes for income and population from larger dataframe
md_per_inc = md_inc.loc[md_inc.Description == 'Personal income', :].set_index('GeoName')
md_pop = md_inc.loc[md_inc.Description == 'Population',:].set_index('GeoName')

In [None]:
# Dividing county values by state values to get percentages
md_per_inc_pct = md_per_inc.loc['Allegany':,
                                '1969':'2022'
                               ].div(md_per_inc.loc['Maryland','1969':'2022']
                                    ).stack().reset_index().set_axis(['County', 'Year', 'Personal_income'], axis = 1)
md_pop_pct = md_pop.loc['Allegany':,
                        '1969':'2022'
                       ].div(md_pop.loc['Maryland','1969':'2022']
                            ).stack().reset_index().set_axis(['County', 'Year', 'Population'], axis = 1)

In [None]:
md_diff = md_per_inc_pct.merge(md_pop_pct, on = ['County', 'Year'])

# Normalizing personal income percentage by population percentage for comparison
md_diff['pct_diff'] = md_diff.Personal_income/md_diff.Population

# Applying log transformation
md_diff['log_diff'] = md_diff.pct_diff.map(np.log)

In [None]:
plot_df = md_diff.merge(md_boundaries.Area_sq_km, left_on = 'County', right_index = True)

In [None]:
counties_of_interest = ['Anne Arundel', 'Baltimore', 'Frederick', 'Howard', 
                        'Montgomery', "Prince George's", 'Baltimore City']
fig = px.line(plot_df.loc[plot_df.County.isin(counties_of_interest)],
              x = 'Year',
              y = 'log_diff',
              color='County',
             )
fig.update_layout({'yaxis': {'title': {'text': 'log(% Personal Income/ % Population)'}}})
fig.write_html('log_diff_by_year.html')

In [None]:
fig = px.scatter(plot_df,
                 x = 'Population',  
                 y = 'Personal_income',
                 size = 'Area_sq_km',
                 color = 'County', 
                 animation_frame='Year',
                 hover_data = ['County', 'Personal_income', 'Population', 'Area_sq_km'],
                 labels = {'Personal_income': 'Proportion of MD Personal Income',
                           'Population': 'Proportion of MD Population',
                           'Area_sq_km': 'Area(sq km)'},
                 range_x = [0,plot_df.Population.max() + 0.05], 
                 range_y = [0,plot_df.Personal_income.max() + 0.05], 
                )

fig.write_html('per_inc_vs_pop.html')

In [None]:
fig = px.choropleth(plot_df,
                    geojson=md_boundaries.geometry,
                    locations = plot_df.County,
                    color = 'log_diff',
                    animation_frame='Year',
                    projection ="mercator",
                    fitbounds= 'geojson',
                    basemap_visible=False,
                    range_color=[plot_df.log_diff.min(), plot_df.log_diff.max()],
                    title='Log Difference Between Personal Income Percentage and Population Percentage in MD (1969 - 2022)'
#                     width = 1000,
#                     height = 1000
                   )
# fig.update_geos(fitbounds="locations", visible=False)
fig.update_coloraxes(colorbar = {'len': 0.75, 'title': {'text': ''}})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.write_html('MD_personal_income_map.html')