# Notes
- Last updated 4/3 9pm

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Price per sq ft

In [None]:
top_ppsf_file = "top_counties_ppsf.csv"
top_ppsf_df = pd.read_csv(top_ppsf_file, encoding="latin")

top_ppsf_df['RegionName'] = top_ppsf_df['RegionName'].str.replace(" County", "")
top_ppsf_df['County'] = top_ppsf_df['RegionName'] + ", " + top_ppsf_df['State']

top_ppsf_clean = top_ppsf_df[['County', 'Price per ft2']]
top_ppsf_clean = top_ppsf_clean.iloc[::-1].reset_index(drop=True)

In [None]:
bottom_ppsf_file = "bottom_counties_ppsf.csv"
bottom_ppsf_df = pd.read_csv(bottom_ppsf_file, encoding="latin")

bottom_ppsf_df['RegionName'] = bottom_ppsf_df['RegionName'].str.replace(" County", "")
bottom_ppsf_df['County'] = bottom_ppsf_df['RegionName'] + ", " + bottom_ppsf_df['State']

bottom_ppsf_clean = bottom_ppsf_df[['County', 'Price per ft2']]

In [None]:
top_ppsf_county_list = top_ppsf_clean.sort_values(['Price per ft2'], ascending = False).reset_index(drop=True)
top_ppsf_county_list['Price per ft2'] = top_ppsf_county_list['Price per ft2'].map("${:.2f}".format)
top_ppsf_county_list.to_csv("simple_top_ppsf.csv", header=True)
bottom_ppsf_county_list= bottom_ppsf_clean.sort_values(['Price per ft2'], ascending = False).reset_index(drop=True)
bottom_ppsf_county_list['Price per ft2'] = bottom_ppsf_county_list['Price per ft2'].map("${:.2f}".format)
bottom_ppsf_county_list.to_csv("simple_bottom_ppsf.csv", header=True)

In [None]:
plt.figure(figsize=(48,30))  
plt.rcParams.update({'font.size': 30})

bottom_ppsf_plot = plt.barh(bottom_ppsf_clean['County'], bottom_ppsf_clean['Price per ft2'])
top_ppsf_plot = plt.barh(top_ppsf_clean['County'], top_ppsf_clean['Price per ft2'])

plt.ylabel('County')
plt.xlabel('Price per Sq. Foot')
plt.title('Counties Sorted by Price per Sq. Foot')

plt.legend((top_ppsf_plot, bottom_ppsf_plot), ("Top 20", "Bottom 20"), loc='lower right')

plt.ylim(-0.75,39.75) 
plt.savefig("../Plots/counties by ppsf.png")

## Sale to List Ratio

In [None]:
top_slr_file = "top_counties_Sale_List_Ratio.csv"
top_slr_df = pd.read_csv(top_slr_file, encoding="latin")

top_slr_df['RegionName'] = top_slr_df['RegionName'].str.replace(" County", "")
top_slr_df['County'] = top_slr_df['RegionName'] + ", " + top_slr_df['StateName']

top_slr_clean = top_slr_df[['County', 'Sale to List Ratio']]
top_slr_clean = top_slr_clean.iloc[::-1].reset_index(drop=True)

In [None]:
bottom_slr_file = "bottom_counties_Sale_List_Ratio.csv"
bottom_slr_df = pd.read_csv(bottom_slr_file, encoding="latin")

bottom_slr_df['RegionName'] = bottom_slr_df['RegionName'].str.replace(" County", "")
bottom_slr_df['County'] = bottom_slr_df['RegionName'] + ", " + bottom_slr_df['StateName']

bottom_slr_clean = bottom_slr_df[['County', 'Sale to List Ratio']]

In [None]:
top_slr_county_list = top_slr_clean.sort_values(['Sale to List Ratio'], ascending = False).reset_index(drop=True)
top_slr_county_list['Sale to List Ratio'] = top_slr_county_list['Sale to List Ratio'].map("{:.3f}".format)
top_slr_county_list.to_csv("simple_top_slr.csv", header=True)
bottom_slr_county_list= bottom_slr_clean.sort_values(['Sale to List Ratio'], ascending = False).reset_index(drop=True)
bottom_slr_county_list['Sale to List Ratio'] = bottom_slr_county_list['Sale to List Ratio'].map("{:.3f}".format)
bottom_slr_county_list.to_csv("simple_bottom_slr.csv", header=True)

In [None]:
plt.figure(figsize=(40,25))  
plt.rcParams.update({'font.size': 30})

bottom_slr_plot = plt.barh(bottom_slr_clean['County'], bottom_slr_clean['Sale to List Ratio'])
top_slr_plot = plt.barh(top_slr_clean['County'], top_slr_clean['Sale to List Ratio'])

plt.ylabel('County')
plt.xlabel('Sale to List Ratio')
plt.title('Counties Sorted by Sale to List Ratio')

plt.legend((top_slr_plot, bottom_slr_plot), ("Top 20", "Bottom 20"), loc='lower right')

plt.ylim(-0.75,39.75) 
plt.xlim(0.75,1.18) 
plt.vlines(1, -10, 40, color='grey', alpha=1)
plt.savefig("../Plots/counties by sale to list ratio.png")

## Median Sale Price

In [None]:
top_sale_file = "top_counties_Median_Sale_Price.csv"
top_sale_df = pd.read_csv(top_sale_file, encoding="latin")

top_sale_df["StateName"] = top_sale_df["StateName"].replace({'Alabama' : 'AL', 'Alaska' : 'AK', 'Arizona' : 'AZ', 'Arkansas' : 'AR', 'California' : 'CA', 'Colorado' : 'CO', 'Connecticut' : 'CT', 'Delaware' : 'DE', 'Florida' : 'FL', 'Georgia' : 'GA', 'Hawaii' : 'HI', 'Idaho' : 'ID', 'Illinois' : 'IL', 'Indiana' : 'IN', 'Iowa' : 'IA', 'Kansas' : 'KS', 'Kentucky' : 'KY', 'Louisiana' : 'LA', 'Maine' : 'ME', 'Maryland' : 'MD', 'Massachusetts' : 'MA', 'Michigan' : 'MI', 'Minnesota' : 'MN', 'Mississippi' : 'MS', 'Missouri' : 'MO', 'Montana' : 'MT', 'Nebraska' : 'NE', 'Nevada' : 'NV', 'New Hampshire' : 'NH', 'New Jersey' : 'NJ', 'New Mexico' : 'NM', 'New York' : 'NY', 'North Carolina' : 'NC', 'North Dakota' : 'ND', 'Ohio' : 'OH', 'Oklahoma' : 'OK', 'Oregon' : 'OR', 'Pennsylvania' : 'PA', 'Rhode Island' : 'RI', 'South Carolina' : 'SC', 'South Dakota' : 'SD', 'Tennessee' : 'TN', 'Texas' : 'TX', 'Utah' : 'UT', 'Vermont' : 'VT', 'Virginia' : 'VA', 'Washington' : 'WA', 'West Virginia' : 'WV', 'Wisconsin' : 'WI', 'Wyoming' : 'WY'})

top_sale_df['RegionName'] = top_sale_df['RegionName'].str.replace(" County", "")
top_sale_df['County'] = top_sale_df['RegionName'] + ", " + top_sale_df['StateName']

top_sale_clean = top_sale_df[['County', 'Median Sale Price']]
top_sale_clean = top_sale_clean.iloc[::-1].reset_index(drop=True)

In [None]:
bottom_sale_file = "bottom_counties_Median_Sale_Price.csv"
bottom_sale_df = pd.read_csv(bottom_sale_file, encoding="latin")

bottom_sale_df["StateName"] = bottom_sale_df["StateName"].replace({'Alabama' : 'AL', 'Alaska' : 'AK', 'Arizona' : 'AZ', 'Arkansas' : 'AR', 'California' : 'CA', 'Colorado' : 'CO', 'Connecticut' : 'CT', 'Delaware' : 'DE', 'Florida' : 'FL', 'Georgia' : 'GA', 'Hawaii' : 'HI', 'Idaho' : 'ID', 'Illinois' : 'IL', 'Indiana' : 'IN', 'Iowa' : 'IA', 'Kansas' : 'KS', 'Kentucky' : 'KY', 'Louisiana' : 'LA', 'Maine' : 'ME', 'Maryland' : 'MD', 'Massachusetts' : 'MA', 'Michigan' : 'MI', 'Minnesota' : 'MN', 'Mississippi' : 'MS', 'Missouri' : 'MO', 'Montana' : 'MT', 'Nebraska' : 'NE', 'Nevada' : 'NV', 'New Hampshire' : 'NH', 'New Jersey' : 'NJ', 'New Mexico' : 'NM', 'New York' : 'NY', 'North Carolina' : 'NC', 'North Dakota' : 'ND', 'Ohio' : 'OH', 'Oklahoma' : 'OK', 'Oregon' : 'OR', 'Pennsylvania' : 'PA', 'Rhode Island' : 'RI', 'South Carolina' : 'SC', 'South Dakota' : 'SD', 'Tennessee' : 'TN', 'Texas' : 'TX', 'Utah' : 'UT', 'Vermont' : 'VT', 'Virginia' : 'VA', 'Washington' : 'WA', 'West Virginia' : 'WV', 'Wisconsin' : 'WI', 'Wyoming' : 'WY'})

bottom_sale_df['RegionName'] = bottom_sale_df['RegionName'].str.replace(" County", "")
bottom_sale_df['County'] = bottom_sale_df['RegionName'] + ", " + bottom_sale_df['StateName']

bottom_sale_clean = bottom_sale_df[['County', 'Median Sale Price']]

In [None]:
# total_sale_county_list = pd.concat([top_sale_clean, bottom_sale_clean])
# total_sale_county_list = total_sale_county_list.sort_values(['Median Sale Price'], ascending = False).reset_index(drop=True)
# total_sale_county_list

top_sale_county_list = top_sale_clean.sort_values(['Median Sale Price'], ascending = False).reset_index(drop=True)
top_sale_county_list['Median Sale Price'] = top_sale_county_list['Median Sale Price'].map("${:,.0f}".format)
top_sale_county_list.to_csv("simple_top_median_sale.csv", header=True)
bottom_sale_county_list= bottom_sale_clean.sort_values(['Median Sale Price'], ascending = False).reset_index(drop=True)
bottom_sale_county_list['Median Sale Price'] = bottom_sale_county_list['Median Sale Price'].map("${:,.0f}".format)
bottom_sale_county_list.to_csv("simple_bottom_median_sale.csv", header=True)

In [None]:
plt.figure(figsize=(40, 25))  
plt.rcParams.update({'font.size': 30})

bottom_sale_plot = plt.barh(bottom_sale_clean['County'], bottom_sale_clean['Median Sale Price'])
top_sale_plot = plt.barh(top_sale_clean['County'], top_sale_clean['Median Sale Price'])

plt.ylabel('County')
plt.xlabel('Median Sale Price')
plt.title('Counties Sorted by Median Sale Price')

plt.legend((top_sale_plot, bottom_sale_plot), ("Top 20", "Bottom 20"), loc='lower right')

plt.ylim(-0.75,39.75) 
plt.savefig("../Plots/counties by median sale price.png")
