In [5]:
!pip install kaleido

from kaleido.scopes.plotly import PlotlyScope

scope = PlotlyScope(
    plotlyjs="https://cdn.plot.ly/plotly-latest.min.js",
    # plotlyjs="/path/to/local/plotly.js",
)

Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


In [7]:
import numpy as np 
import pandas as pd
import geopandas as gpd
import folium
import plotly
import plotly.express as px
import PIL
import io

In [8]:
# Importing geojson file with county boundaries and creating column for Maryland FIPS codes
md_boundaries = gpd.read_file('/kaggle/input/personal-income/Maryland_Physical_Boundaries_-_County_Boundaries_(Generalized).geojson'
                             ).set_index('county')
md_boundaries['GeoFIPS'] = md_boundaries.county_fip.apply(lambda x: '24' + str(x).zfill(3))
md_boundaries['Area_sq_km'] = md_boundaries.to_crs('3857').area/1000


In [9]:
# Importing csv file with MD personal incomes and removing unwanted columns
md_inc = pd.read_csv('/kaggle/input/personal-income/CAINC1_MD_1969_2022.csv', 
                     skipfooter=4,
                     engine = 'python' # default 'c' engine does not support skipfooter
                    ).drop(columns = ['TableName', 'IndustryClassification', 'Unit', 'LineCode'])

md_inc['Description'] = md_inc.Description.apply(lambda x: x.split(' (')[0])
md_inc['GeoName'] = md_inc.GeoName.apply(lambda x: x.split(',')[0]).str.replace('(Independent City)', 'City')

In [10]:
# Creating new dataframes for income and population from larger dataframe
md_per_inc = md_inc.loc[md_inc.Description == 'Personal income', :].set_index('GeoName')
md_pop = md_inc.loc[md_inc.Description == 'Population',:].set_index('GeoName')

In [11]:
# Dividing county values by state values to get percentages
md_per_inc_pct = md_per_inc.loc['Allegany':,
                                '1969':'2022'
                               ].div(md_per_inc.loc['Maryland','1969':'2022']
                                    ).stack().reset_index().set_axis(['County', 'Year', 'Personal_income'], axis = 1)
md_pop_pct = md_pop.loc['Allegany':,
                        '1969':'2022'
                       ].div(md_pop.loc['Maryland','1969':'2022']
                            ).stack().reset_index().set_axis(['County', 'Year', 'Population'], axis = 1)

In [12]:
# Merging dataframes together
md_diff = md_per_inc_pct.merge(md_pop_pct, on = ['County', 'Year'])

# Normalizing personal income percentage by population percentage for comparison
md_diff['pct_diff'] = md_diff.Personal_income/md_diff.Population

# Applying log transformation
md_diff['log_diff'] = md_diff.pct_diff.map(np.log)

In [13]:
# Creating list of counties with interesting changes to highlight in plots
counties_of_interest = ['Anne Arundel', 'Baltimore', 'Frederick', 'Howard', 
                        'Montgomery', "Prince George's", 'Baltimore City', 'Harford']

# Creating new dataframe for plotting and adding area data from boundary dataframe
plot_df = md_diff.merge(md_boundaries.Area_sq_km.apply(round),
                        left_on = 'County', 
                        right_index = True)
plot_df['Plot_County'] = plot_df.County.apply(lambda x: x if x in counties_of_interest else "")

In [52]:
counties_of_interest = ['Anne Arundel', 'Baltimore', 'Frederick',
                        "Prince George's"]


# Generating line plot to show 
fig = px.line(plot_df.loc[plot_df.County.isin(counties_of_interest)],
              category_orders={'County':['Anne Arundel', 'Frederick','Baltimore',  "Prince George's"]},
              x = 'Year',
              y = 'log_diff',
              color='County',
              facet_col = 'County',
              facet_col_wrap = 2
             )
fig.add_hline(y = 0)
fig.update_yaxes(matches=None)
fig.update_layout({'yaxis': {'title': {'text': 'log(% Personal Income/ % Population)'}}})
fig.write_html('log_diff_by_year.html')
# fig.show()

In [53]:
# Generate your animated plot.
fig = px.scatter(plot_df.sort_values(by = ['County', 'Year'], ascending=[False, True]),
                 x = 'Population',  
                 y = 'Personal_income',
                 size = 'Area_sq_km',
                 color = 'County', 
                 animation_frame='Year',
                 text = 'Plot_County',
                 hover_data = ['County', 'Personal_income', 'Population', 'Area_sq_km'],
                 labels = {'Personal_income': '% MD Personal Income',
                           'Population': '% MD Population',
                           'Area_sq_km': 'Area (sq km)'},
                 range_x = [-0.01,0.26], 
                 range_y = [-0.01,0.26], 
                 height = 1000
                )
#adding reference ab line
fig.add_scatter(x = [-1,1], y = [-1,1], 
                showlegend=False, 
                line = dict(color='gray', width=1) )
fig.update_traces(showlegend = False, textfont = {'size': 9})
fig.update_layout(yaxis = {'title': {'text': 'Proportion of MD Personal Income'}},
                  xaxis = {'title': {'text': 'Proportion of MD Population'}})

fig.write_html('per_inc_vs_pop.html')

In [19]:
# Save each plot frame to a list that is used to generate the .gif file. 
frames = []
for slider_pos, frame in enumerate(fig.frames):
    fig.update(data=frame.data)
    fig.layout.sliders[0].update(active=slider_pos)
    fig.add_scatter(x = [-1,1], y = [-1,1], showlegend=False, line = dict(color='gray', width=1) )
    fig.update_traces(showlegend = False, textfont = {'size': 9})

    frames.append(PIL.Image.open(io.BytesIO(scope.transform(fig, format = 'jpeg', scale = 1))))
    
# Create the gif file.
frames[0].save("md_per_inc_vs_pop.gif",
               save_all=True,
               append_images=frames[1:],
               optimize=True,
               duration=300,
               loop=0)

KeyboardInterrupt: 

In [36]:
# Generating choropleth map showing log_diff for each county over time
fig = px.choropleth(plot_df,
                    geojson=md_boundaries.geometry,
                    locations = plot_df.County,
                    color = 'log_diff',
                    animation_frame='Year',
                    fitbounds= 'locations',
                    basemap_visible=False,
                    range_color=[plot_df.log_diff.min(), plot_df.log_diff.max()],
                    title='Log Difference Between Personal Income Percentage <br>and Population Percentage in MD (1969 - 2022)'
#                     width = 1000,
#                     height = 1000
                   )
fig.update_layout(sliders = [{'pad': {'t':10}}], 
                  updatemenus = [{'pad': {'t':15}}])
fig.write_html('MD_personal_income_map.html')





In [38]:
# Save each plot frame to a list that is used to generate the .gif file. 
frames = []
for slider_pos, frame in enumerate(fig.frames):
    fig.update(data=frame.data)
    fig.layout.sliders[0].update(active=slider_pos)

    frames.append(PIL.Image.open(io.BytesIO(scope.transform(fig, 
                                                            format = 'png'
                                                           ))))
        
# Create the gif file.
frames[0].save("md_personal_income_map.gif",
               save_all=True,
               append_images=frames[1:],
               optimize=True,
               duration=300,
               loop=0)

KeyboardInterrupt: 