# Notes
- Last updated 3/28 11pm

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
zillow_median_price_file = "raw data/Zillow_County_MedianSalePrice.csv"
zprice_df = pd.read_csv(zillow_median_price_file, encoding="latin")

In [3]:
columns = zprice_df.columns

# Getting just 2014-2018 (5 complete years)
filtered_columns = [col for col in columns if (col.startswith('RegionN')) or 
                    (col.startswith('State')) or (col.startswith('2014')) or 
                    (col.startswith('2015')) or (col.startswith('2016')) or 
                    (col.startswith('2017')) or (col.startswith('2018'))]
df_2014_2018 = zprice_df[filtered_columns]
df_2014_2018.head()

Unnamed: 0,RegionName,StateName,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,...,2018-03,2018-04,2018-05,2018-06,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12
0,Los Angeles County,California,421900.0,423700.0,424700.0,429600.0,430900.0,430400.0,432600.0,432800.0,...,568000.0,568600.0,573400.0,577300.0,579000.0,578300.0,,,,
1,Cook County,Illinois,201800.0,208600.0,207200.0,206900.0,202300.0,205000.0,207600.0,211300.0,...,242100.0,243900.0,239800.0,232500.0,232000.0,230100.0,228000.0,228700.0,236900.0,229500.0
2,Maricopa County,Arizona,158300.0,159900.0,159800.0,159600.0,159400.0,158100.0,159600.0,160300.0,...,240300.0,235700.0,233000.0,234700.0,239500.0,243500.0,244100.0,244100.0,243500.0,242700.0
3,San Diego County,California,418700.0,419400.0,421600.0,426000.0,429600.0,429700.0,432500.0,435200.0,...,546400.0,554600.0,555700.0,559000.0,558100.0,561300.0,564900.0,562800.0,556800.0,553200.0
4,Orange County,California,542500.0,549800.0,549500.0,548700.0,549500.0,549000.0,550500.0,550000.0,...,710800.0,709800.0,704300.0,697600.0,704400.0,707000.0,,,,


In [4]:
# Changing format to add columns: DATES and VALUES
long_skinny_df = pd.melt(df_2014_2018, id_vars=["RegionName", "StateName"], var_name = "Month", value_name = "Median Sale Price").dropna(how='any')
long_skinny_df = long_skinny_df.sort_values('Median Sale Price', ascending=False)

In [5]:
grouped_by_date = long_skinny_df.groupby(['Month'])

In [6]:
# Find the x=15 most expensive Counties for each month
top_df = grouped_by_date.head(15)

# Show the Counties that show up on the monthly list of x priciest and how often
top_counties = top_df['RegionName'].value_counts()[0:20]
top_counties

Marin County            60
Eagle County            60
Alameda County          60
New York County         60
San Mateo County        60
San Francisco County    60
Santa Clara County      60
Santa Cruz County       60
Napa County             58
Orange County           56
Dukes County            46
Ventura County          40
Westchester County      39
Arlington County        37
Sonoma County           32
Honolulu County         30
District of Columbia    24
Santa Barbara County    21
Monterey County         11
Alexandria City          7
Name: RegionName, dtype: int64

In [7]:
# Find the x cheapest Counties for each month
bottom_df = grouped_by_date.tail(15)

# Show the Counties that show up on the monthly list of x cheapest and how often
bottom_counties = bottom_df['RegionName'].value_counts()[0:20]
bottom_counties

Carroll County       60
Richmond County      57
Knox County          53
Allegany County      47
Weakley County       41
Bay County           38
Trumbull County      38
Clayton County       33
Putnam County        31
Henry County         30
Lawrence County      29
Chautauqua County    29
Macoupin County      29
Hardin County        26
Jefferson County     24
Montgomery County    24
Coles County         19
Stephenson County    18
Gibson County        17
Bibb County          17
Name: RegionName, dtype: int64

In [8]:
# export list to csv
top_counties_df = pd.DataFrame(top_counties)
top_counties_df=top_counties_df.rename(columns={"RegionName":"Times on the monthly top 15 list"})
top_counties_df.to_csv("Zillow CSV results/top_20_counties_Median_Sale_Price.csv", header=True)

In [9]:
# export list to csv
bottom_counties_df = pd.DataFrame(bottom_counties)
bottom_counties_df=bottom_counties_df.rename(columns={"RegionName":"Times on the monthly bottom 15 list"})
bottom_counties_df.to_csv("Zillow CSV results/bottom_20_counties_Median_Sale_Price.csv", header=True)