### Activity 1

In [None]:
import pandas as pd
import os
from pathlib import Path
import hvplot.pandas

### Prepare the DataFrame

In [None]:
# Read in the population_counts.csv file into a DataFrame
population_df = pd.read_csv(
    Path("Resources/population_counts.csv")
).drop_duplicates()

df = population_df.copy()
df

### review 

In [None]:
subset = df.iloc[9999:10005:2, [3, 5]]
subset

In [None]:
# Select all rows for the state Alabama where the 'PopulationCount' is greater than 10,000. Return the columns 'Year', 'CityName', and 'PopulationCount'.
al_df = df.loc[(df['StateDesc'] == 'Alabama') & (df['PopulationCount'] > 10000), ['Year', 'CityName', 'PopulationCount']]
al_df

In [None]:
# newyork
ny_df = df.loc[df['StateDesc'] == 'New York', ["CityName", "PopulationCount", "Latitude", "Longitude"]]
ny_df

In [None]:
ny_df.groupby('CityName').PopulationCount.sum()

[pandas cheatsheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf)

In [None]:
map_plot_ny = ny_df.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size = 'PopulationCount',
    scale = .02,
    color = 'CityName',
    alpha = .5,
    tiles = 'OSM',
    frame_width = 700,
    frame_height = 500
)
map_plot_ny    
    

### Plot data, story of populatioin change in CA

In [None]:
# change population count in 2016 randomly. 
display(df[df['Year'] == 2015])
display(df[df['Year'] == 2016])

In [None]:
import numpy as np

def increase_population_randomly(population):
    increase_factor = 1 + np.random.uniform(-0.10, 0.10)
    return int(population * increase_factor)


df.loc[df['Year'] == 2016, 'PopulationCount'] = df.loc[df['Year'] == 2016, 'PopulationCount'].apply(increase_population_randomly)
display(df[df['Year'] == 2015])
display(df[df['Year'] == 2016])


### Plot data

In [None]:
# step 1 prepare df for 2015, 2016
df_2015 = df.loc[df['Year'] == 2015, ['Year', 'StateDesc', 'CityName', 'PopulationCount', 'Latitude', 'Longitude']]
df_2016 = df.loc[df['Year'] == 2016, ['Year', 'StateDesc', 'CityName', 'PopulationCount', 'Latitude', 'Longitude']]

display(df_2015)
display(df_2016)

In [None]:
# step 2 prepare df for CA
ca_df_2015 = df_2015.loc[df_2015['StateDesc'] == 'California'] 
ca_df_2016 = df_2016.loc[df_2016['StateDesc'] == 'California'] 
display(ca_df_2015)
display(ca_df_2016)

In [None]:
plot = ca_df_2015.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size = 'PopulationCount',
    scale = .02,
    color = 'blue',
    alpha = .5,
    tiles = 'OSM',
    frame_width = 700,
    frame_height = 500
)
plot1 = ca_df_2016.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size = 'PopulationCount',
    scale = .02,
    color = 'red',
    alpha = .5,
    tiles = 'OSM',
    frame_width = 700,
    frame_height = 500
)    

overlay = plot + plot1
overlay

In [None]:
# step calculate the diff between 2015, 2016

merged_population = ca_df_2015.merge(ca_df_2016, on = ['Longitude','Latitude'], suffixes = ('_2015','_2016'))
merged_population['PopulationChange'] = merged_population['PopulationCount_2016'] - merged_population['PopulationCount_2015']
merged_population
population_change_df = merged_population[['Year_2016','StateDesc_2016','CityName_2016' ,'PopulationChange','Latitude','Longitude']]
population_change_df = population_change_df.rename(columns={'Year_2016': 'Year','StateDesc_2016': 'StateDesc','CityName_2016': 'CityName'})
population_change_df

In [None]:
ca_2015_plot = ca_df_2015.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size = 'PopulationCount',
    hover_cols = ['CityName', 'PopulationCount'],
    scale = .02,
    color = 'blue',
    alpha = .5,
    tiles = 'OSM',
    frame_width = 700,
    frame_height = 500
)
ca_2016_plot = ca_df_2016.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size = 'PopulationCount',
    hover_cols = ['CityName', 'PopulationCount'],
    scale = .02,
    color = 'red',
    alpha = .5,
    tiles = 'OSM',
    frame_width = 700,
    frame_height = 500
)
population_change_plot = population_change_df.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size = abs(population_change_df['PopulationChange']),
    hover_cols = ['CityName', 'PopulationChange'],
    scale = .1,
    color = 'black',
    alpha = .5,
    tiles = 'OSM',
    frame_width = 700,
    frame_height = 500
)
    

In [None]:
final_layout = (ca_2015_plot + ca_2016_plot + population_change_plot).cols(1)
final_layout

In [None]:
def num_color(number):
    if number > 0:
        return 'black'
    else:
        return 'red'
    
population_change_df['Color'] = population_change_df['PopulationChange'].apply(num_color)
# population_change_df

population_change_plot = population_change_df.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size = abs(population_change_df['PopulationChange']),
    hover_cols = ['CityName', 'PopulationChange'],
    scale = .1,
    color = population_change_df['Color'],
    alpha = .5,
    tiles = 'OSM',
    frame_width = 700,
    frame_height = 500
)

ca_2015_plot = ca_df_2015.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size = 'PopulationCount',
    hover_cols = ['CityName', 'PopulationCount'],
    scale = .02,
    color = 'blue',
    alpha = .5,
    tiles = 'OSM',
    frame_width = 700,
    frame_height = 500
)
ca_2016_plot = ca_df_2016.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size = 'PopulationCount',
    hover_cols = ['CityName', 'PopulationCount'],
    scale = .02,
    color = 'red',
    alpha = .5,
    tiles = 'OSM',
    frame_width = 700,
    frame_height = 500
)

final_layout = (ca_2015_plot + ca_2016_plot + population_change_plot).cols(1)
final_layout