In [None]:
import pandas as pd
import altair as alt

In [None]:
pickle_df = pd.read_pickle("df.pickle")
pickle_df.head()

In [None]:
print(list(pickle_df.columns))

In [None]:
def get_change_df(year1, year2):
    columns = ['Zillow', 'filename', 'Price', 'Income', 'Total housing units', 'Population']
    df_y1 = pickle_df[
        (pickle_df.Year == year1) & (pickle_df.Month == 11)][columns].dropna(subset=columns, axis='rows').copy()
    df_y2 = pickle_df[
        (pickle_df.Year == year2) & (pickle_df.Month == 9)][columns].dropna(subset=columns, axis='rows').copy()

    df = pd.merge(df_y1, df_y2, on=['Zillow', 'filename'], suffixes=[f'_{year1}', f'_{year2}'])

    for column in ['Price', 'Income', 'Total housing units', 'Population']:
        df[f'{column} Change'] = df[f'{column}_{year2}'] / df[f'{column}_{year1}'] - 1
        
    for year in (year1, year2):
        df[f'Housing over Pop_{year}'] = df[f'Total housing units_{year}'] / df[f'Population_{year}']
    df[f'Housing over Pop Change'] = df[f'Housing over Pop_{year2}'] / df[f'Housing over Pop_{year1}'] - 1
    
    return df

def get_tier(x):
    if '0.33_0.67' in x:
        return 'Mid'
    if '0.0_0.33' in x:
        return 'Low'
    return 'Hi'

def get_type(x):
    if 'bdrmcnt' in x:
        count = x.split("_")[3]
        return f"{count} Bedroom(s)"
    if '_condo_tier' in x:
        return "Condo"
    if "_sfr_tier" in x:
        return "Single Family"
    return "All"

def add_file_data(df):
    df['tier'] = df.filename.apply(get_tier)
    df['type'] = df.filename.apply(get_type)
    df['category'] = df.apply(lambda x: f"{x.tier} Tier, {x.type}", axis='columns')

    return df.copy()

df_change = add_file_data(get_change_df(2019, 2021))
df_change.head()

In [None]:
df_change.category.unique()

In [None]:
alt.Chart(
    df_change[(df_change.Zillow!='York, PA') & (df_change.category=='Mid Tier, All')]
).mark_point(opacity=0.8, filled=True, size=60).encode(
    y=alt.Y('Total housing units Change', axis=alt.Axis(format="%")),
    x=alt.X('Population Change', axis=alt.Axis(format="%")),
    # size=alt.Size('Price Change', legend=alt.Legend(format="%")),
    color=alt.Color(
        'Price Change', scale=alt.Scale(scheme='magma'), legend=alt.Legend(format="%"), sort='descending'
    ),
    tooltip='Zillow',
)

In [None]:
alt.Chart(
    df_change[(df_change.Zillow!='York, PA') & (df_change.category=='Mid Tier, All')]
).mark_point(opacity=0.6, filled=True, size=60).encode(
    y=alt.Y('Income Change', axis=alt.Axis(format="%")),
    x=alt.X('Housing over Pop Change', axis=alt.Axis(format="%")),
    color=alt.Color(
        'Price Change', sort='descending', scale=alt.Scale(scheme="magma"), legend=alt.Legend(format="%")
    ),
    tooltip='Zillow',
)