In [34]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/mpetrosyaaan/thcode/main/data/datathesis.csv',delimiter=';')
df
     

Unnamed: 0,states,states_code,id,year,population
0,Yerevan,AM-ER,1,2011,1060138
1,Yerevan,AM-ER,1,2012,1061044
2,Yerevan,AM-ER,1,2013,1066264
3,Yerevan,AM-ER,1,2014,1068306
4,Yerevan,AM-ER,1,2015,1071511
...,...,...,...,...,...
138,Tavush,AM-TV,11,2019,122242
139,Tavush,AM-TV,11,2020,121475
140,Tavush,AM-TV,11,2021,120534
141,Tavush,AM-TV,11,2022,119759


In [35]:
# Subset dataframe by year
selected_year = 2023
df_selected_year = df[df.year == selected_year]
df_selected_year

Unnamed: 0,states,states_code,id,year,population
12,Yerevan,AM-ER,1,2023,1098866
25,Aragatsotn,AM-AG,2,2023,125687
38,Ararat,AM-AR,3,2023,259277
51,Armavir,AM-AV,4,2023,266656
64,Gegharkunik,AM-GR,5,2023,228711
77,Lori,AM-LO,6,2023,211582
90,Kotayk,AM-KT,7,2023,253857
103,Shirak,AM-SH,8,2023,230476
116,Syunik,AM-SU,9,2023,134555
129,VayotsDzor,AM-VD,10,2023,47661


In [36]:
# Sort by year
df_selected_year_sorted = df_selected_year.sort_values(by="population", ascending=False)
df_selected_year_sorted


Unnamed: 0,states,states_code,id,year,population
12,Yerevan,AM-ER,1,2023,1098866
51,Armavir,AM-AV,4,2023,266656
38,Ararat,AM-AR,3,2023,259277
90,Kotayk,AM-KT,7,2023,253857
103,Shirak,AM-SH,8,2023,230476
64,Gegharkunik,AM-GR,5,2023,228711
77,Lori,AM-LO,6,2023,211582
116,Syunik,AM-SU,9,2023,134555
25,Aragatsotn,AM-AG,2,2023,125687
142,Tavush,AM-TV,11,2023,119802


In [37]:
# Calculate population difference between selected and previous year
def calculate_population_difference(input_df, input_year):
  selected_year_data = input_df[input_df['year'] == input_year].reset_index()
  previous_year_data = input_df[input_df['year'] == input_year - 1].reset_index()
  selected_year_data['population_difference'] = selected_year_data.population.sub(previous_year_data.population, fill_value=0)
  selected_year_data['population_difference_absolute'] = abs(selected_year_data['population_difference'])
  return pd.concat([selected_year_data.states, selected_year_data.id, selected_year_data.population, selected_year_data.population_difference, selected_year_data.population_difference_absolute], axis=1).sort_values(by="population_difference", ascending=False)

df_population_difference_sorted = calculate_population_difference(df, selected_year)
df_population_difference_sorted

Unnamed: 0,states,id,population,population_difference,population_difference_absolute
0,Yerevan,1,1098866,6088,6088
6,Kotayk,7,253857,2781,2781
2,Ararat,3,259277,2662,2662
3,Armavir,4,266656,2273,2273
1,Aragatsotn,2,125687,1041,1041
4,Gegharkunik,5,228711,933,933
7,Shirak,8,230476,136,136
9,VayotsDzor,10,47661,77,77
10,Tavush,11,119802,43,43
5,Lori,6,211582,-95,95


In [38]:
# Filter states with population difference > 1000
df_greater_1000 = df_population_difference_sorted[df_population_difference_sorted.population_difference_absolute > 1000]
df_greater_1000

Unnamed: 0,states,id,population,population_difference,population_difference_absolute
0,Yerevan,1,1098866,6088,6088
6,Kotayk,7,253857,2781,2781
2,Ararat,3,259277,2662,2662
3,Armavir,4,266656,2273,2273
1,Aragatsotn,2,125687,1041,1041


In [39]:
# % of States with population difference > 1000
int((len(df_greater_1000)/df_population_difference_sorted.states.nunique())*100)

45

Plots
Heatmap

In [40]:
import altair as alt

alt.themes.enable("dark")

heatmap = alt.Chart(df).mark_rect().encode(
        y=alt.Y('year:O', axis=alt.Axis(title="Year", titleFontSize=16, titlePadding=15, titleFontWeight=900, labelAngle=0)),
        x=alt.X('states:O', axis=alt.Axis(title="States", titleFontSize=16, titlePadding=15, titleFontWeight=900)),
        color=alt.Color('max(population):Q',
                         legend=alt.Legend(title=" "),
                         scale=alt.Scale(scheme="blueorange")),
        stroke=alt.value('black'),
        strokeWidth=alt.value(0.25),
        #tooltip=[
        #    alt.Tooltip('year:O', title='Year'),
        #    alt.Tooltip('population:Q', title='Population')
        #]
    ).properties(width=900
    #).configure_legend(orient='bottom', titleFontSize=16, labelFontSize=14, titlePadding=0
    #).configure_axisX(labelFontSize=14)
    ).configure_axis(
    labelFontSize=12,
    titleFontSize=12
    )

heatmap

Choropleth

In [41]:
# Choropleth via Altair
import altair as alt
from vega_datasets import data

alt.themes.enable("dark")

states = alt.topo_feature(data.us_10m.url, 'states')

alt.Chart(states).mark_geoshape().encode(
    color=alt.Color('population:Q', scale=alt.Scale(scheme='blues')),   # scale=color_scale
    stroke=alt.value('#154360')
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(df_selected_year, 'id', list(df_selected_year.columns))
).properties(
    width=500,
    height=300
).project(
    type='albersUsa'
)

In [42]:
# Choropleth via Plotly
import plotly.express as px

choropleth = px.choropleth(df_selected_year, locations='states_code', color='population', locationmode="USA-states",
                               color_continuous_scale='blues',
                               range_color=(0, max(df_selected_year.population)),
                               scope="usa",
                               labels={'population':'Population'}
                              )
choropleth.update_layout(
        template='plotly_dark',
        plot_bgcolor='rgba(0, 0, 0, 0)',
        paper_bgcolor='rgba(0, 0, 0, 0)',
        margin=dict(l=0, r=0, t=0, b=0),
        height=350
    )

choropleth
     
