In [1]:
#Import dependencies
import pandas as pd
from datetime import datetime
from sqlalchemy import create_engine
import numpy as np

In [2]:
# Load files

# Real Estate Housing Inventory - Seasonally Adjusted
housing_inventory_sa = '../Resources/Redfin-Months-Supply-SA.csv'
# Real Estate Housing Inventory - Non-Seasonally Adjusted
housing_inventory_nsa = '../Resources/Redfin-Months-Supply-NSA.csv'
# Real Estate Housing Inventory - Month-Over-Month - Seasonally Adjusted
housing_inventory_mom_sa = '../Resources/Redfin-Months-Supply-mom-SA.csv'
# Real Estate Housing Inventory - Month-Over-Month - Non-Seasonally Adjusted
housing_inventory_mom_nsa = '../Resources/Redfin-Months-Supply-mom-NSA.csv'
# Real Estate Housing Inventory - Month-Over-Month - Seasonally Adjusted
housing_inventory_yoy_sa = '../Resources/Redfin-Months-Supply-yoy-SA.csv'
# Real Estate Housing Inventory - Month-Over-Month - Non-Seasonally Adjusted
housing_inventory_yoy_nsa = '../Resources/Redfin-Months-Supply-yoy-NSA.csv'

In [3]:
# Read the CSV into pandas
housing_sa_df = pd.read_csv(housing_inventory_sa)
housing_nsa_df = pd.read_csv(housing_inventory_nsa)
housing_mom_sa_df = pd.read_csv(housing_inventory_mom_sa)
housing_mom_nsa_df = pd.read_csv(housing_inventory_mom_nsa)
housing_yoy_sa_df = pd.read_csv(housing_inventory_yoy_sa)
housing_yoy_nsa_df = pd.read_csv(housing_inventory_yoy_nsa)

In [4]:
# Real Estate Housing Inventory - Seasonally Adjusted
housing_sa_df

Unnamed: 0,Region,National
0,"February 1, 2012",5.4
1,"March 1, 2012",5.6
2,"April 1, 2012",5.5
3,"May 1, 2012",5.3
4,"June 1, 2012",5.2
...,...,...
101,"July 1, 2020",1.9
102,"August 1, 2020",1.8
103,"September 1, 2020",1.6
104,"October 1, 2020",1.5


In [5]:
# Check the column names; note ' National' has a leading space
housing_nsa_df.columns

Index(['Region', ' National'], dtype='object')

In [6]:
# housing_sa_df contains the monthly Supply of US Housing Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_sa_df[['Month','Day','Year']] = housing_sa_df.Region.str.split(" ",expand=True)
housing_sa_df= housing_sa_df.drop(['Region'], axis = 1) 
housing_sa_df= housing_sa_df.drop(['Day'], axis = 1)
housing_sa_df= housing_sa_df.rename(columns={' National': 'Housing_SA'})

# Change the month name to a 2-digit month code
# Create a dictionary of month_code keys & values
month_code_dict = {'January': '01',
                   'February': '02',
                   'March': '03',
                   'April': '04',
                   'May': '05',
                   'June': '06',
                   'July': '07',
                   'August': '08',
                   'September': '09',
                   'October': '10',
                   'November': '11',
                   'December': '12'
                  }

# Remap the values of the new month_code field using the dictionary
housing_sa_df['Month'] = housing_sa_df['Month'].map(month_code_dict)


# Re-order columns to put Year and Month first
housing_sa_df = housing_sa_df[['Year', 'Month', 'Housing_SA']]

housing_sa_df

Unnamed: 0,Year,Month,Housing_SA
0,2012,02,5.4
1,2012,03,5.6
2,2012,04,5.5
3,2012,05,5.3
4,2012,06,5.2
...,...,...,...
101,2020,07,1.9
102,2020,08,1.8
103,2020,09,1.6
104,2020,10,1.5


In [8]:
# Real Estate Housing Inventory - Non-Seasonally Adjusted
housing_nsa_df

Unnamed: 0,Region,National
0,"February 1, 2012",6.8
1,"March 1, 2012",5.4
2,"April 1, 2012",5.3
3,"May 1, 2012",4.7
4,"June 1, 2012",4.4
...,...,...
101,"July 1, 2020",1.7
102,"August 1, 2020",1.7
103,"September 1, 2020",1.7
104,"October 1, 2020",1.6


In [9]:
# housing_nsa_df contains the monthly Supply of US Housing Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_nsa_df[['Month','Day','Year']] = housing_nsa_df.Region.str.split(" ",expand=True)
housing_nsa_df= housing_nsa_df.drop(['Region'], axis = 1) 
housing_nsa_df= housing_nsa_df.drop(['Day'], axis = 1)
housing_nsa_df= housing_nsa_df.rename(columns={' National': 'Housing_NSA'})

# Change the month name to a 2-digit month code
# Remap the values of the new month_code field using the dictionary
housing_nsa_df['Month'] = housing_nsa_df['Month'].map(month_code_dict)

housing_nsa_df

Unnamed: 0,Housing_NSA,Month,Year
0,6.8,02,2012
1,5.4,03,2012
2,5.3,04,2012
3,4.7,05,2012
4,4.4,06,2012
...,...,...,...
101,1.7,07,2020
102,1.7,08,2020
103,1.7,09,2020
104,1.6,10,2020


In [10]:
# Real Estate Housing Inventory - Month-Over-Month - Seasonally Adjusted
housing_mom_sa_df

Unnamed: 0,Region,National
0,"February 1, 2012",-0.4
1,"March 1, 2012",0.2
2,"April 1, 2012",-0.1
3,"May 1, 2012",-0.2
4,"June 1, 2012",-0.2
...,...,...
101,"July 1, 2020",-0.6
102,"August 1, 2020",-0.1
103,"September 1, 2020",-0.1
104,"October 1, 2020",-0.1


In [11]:
# housing_mom_sa_df contains the monthly Supply of US Housing Month-Over-Month Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_mom_sa_df[['Month','Day','Year']] = housing_mom_sa_df.Region.str.split(" ",expand=True)
housing_mom_sa_df= housing_mom_sa_df.drop(['Region'], axis = 1) 
housing_mom_sa_df= housing_mom_sa_df.drop(['Day'], axis = 1)
housing_mom_sa_df= housing_mom_sa_df.rename(columns={' National': 'Housing_MOM_SA'})

# Change the month name to a 2-digit month code
# Remap the values of the new month_code field using the dictionary
housing_mom_sa_df['Month'] = housing_mom_sa_df['Month'].map(month_code_dict)

housing_mom_sa_df

Unnamed: 0,Housing_MOM_SA,Month,Year
0,-0.4,02,2012
1,0.2,03,2012
2,-0.1,04,2012
3,-0.2,05,2012
4,-0.2,06,2012
...,...,...,...
101,-0.6,07,2020
102,-0.1,08,2020
103,-0.1,09,2020
104,-0.1,10,2020


In [12]:
# Real Estate Housing Inventory - Month-Over-Month - Non-Seasonally Adjusted
housing_mom_nsa_df

Unnamed: 0,Region,National
0,"February 1, 2012",-0.6
1,"March 1, 2012",-1.4
2,"April 1, 2012",-0.1
3,"May 1, 2012",-0.7
4,"June 1, 2012",-0.3
...,...,...
101,"July 1, 2020",-0.4
102,"August 1, 2020",0.0
103,"September 1, 2020",0.0
104,"October 1, 2020",-0.1


In [13]:
# housing_mom_nsa_df contains the monthly Supply of US Housing Month-Over-Month Non-Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_mom_nsa_df[['Month','Day','Year']] = housing_mom_nsa_df.Region.str.split(" ",expand=True)
housing_mom_nsa_df= housing_mom_nsa_df.drop(['Region'], axis = 1) 
housing_mom_nsa_df= housing_mom_nsa_df.drop(['Day'], axis = 1)
housing_mom_nsa_df= housing_mom_nsa_df.rename(columns={' National': 'Housing_MOM_NSA'})

# Change the month name to a 2-digit month code
# Remap the values of the new month_code field using the dictionary
housing_mom_nsa_df['Month'] = housing_mom_nsa_df['Month'].map(month_code_dict)

housing_mom_nsa_df

Unnamed: 0,Housing_MOM_NSA,Month,Year
0,-0.6,02,2012
1,-1.4,03,2012
2,-0.1,04,2012
3,-0.7,05,2012
4,-0.3,06,2012
...,...,...,...
101,-0.4,07,2020
102,0.0,08,2020
103,0.0,09,2020
104,-0.1,10,2020


In [14]:
# Real Estate Housing Inventory - Year-Over-Year - Seasonally Adjusted
housing_yoy_sa_df

Unnamed: 0,Region,National
0,"February 1, 2012",
1,"March 1, 2012",
2,"April 1, 2012",
3,"May 1, 2012",
4,"June 1, 2012",
...,...,...
101,"July 1, 2020",-1.0
102,"August 1, 2020",-1.1
103,"September 1, 2020",-1.3
104,"October 1, 2020",-1.4


In [15]:
# housing_yoy_sa_df contains the monthly Supply of US Housing Year-Over-Year Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_yoy_sa_df[['Month','Day','Year']] = housing_yoy_sa_df.Region.str.split(" ",expand=True)
housing_yoy_sa_df= housing_yoy_sa_df.drop(['Region'], axis = 1) 
housing_yoy_sa_df= housing_yoy_sa_df.drop(['Day'], axis = 1)
housing_yoy_sa_df= housing_yoy_sa_df.rename(columns={' National': 'Housing_YOY_SA'})

# Change the month name to a 2-digit month code
# Remap the values of the new month_code field using the dictionary
housing_yoy_sa_df['Month'] = housing_yoy_sa_df['Month'].map(month_code_dict)

housing_yoy_sa_df

Unnamed: 0,Housing_YOY_SA,Month,Year
0,,02,2012
1,,03,2012
2,,04,2012
3,,05,2012
4,,06,2012
...,...,...,...
101,-1.0,07,2020
102,-1.1,08,2020
103,-1.3,09,2020
104,-1.4,10,2020


In [16]:
# Real Estate Housing Inventory - Year-Over-Year - Non-Seasonally Adjusted
housing_yoy_nsa_df

Unnamed: 0,Region,National
0,"February 1, 2012",-2.6
1,"March 1, 2012",-1.5
2,"April 1, 2012",-1.6
3,"May 1, 2012",-1.9
4,"June 1, 2012",-1.4
...,...,...
101,"July 1, 2020",-0.9
102,"August 1, 2020",-0.9
103,"September 1, 2020",-1.4
104,"October 1, 2020",-1.4


In [17]:
# housing_yoy_nsa_df contains the monthly Supply of US Housing Year-Over-Year Non-Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_yoy_nsa_df[['Month','Day','Year']] = housing_yoy_nsa_df.Region.str.split(" ",expand=True)
housing_yoy_nsa_df= housing_yoy_nsa_df.drop(['Region'], axis = 1) 
housing_yoy_nsa_df= housing_yoy_nsa_df.drop(['Day'], axis = 1)
housing_yoy_nsa_df= housing_yoy_nsa_df.rename(columns={' National': 'Housing_YOY_NSA'})

# Change the month name to a 2-digit month code
# Remap the values of the new month_code field using the dictionary
housing_yoy_nsa_df['Month'] = housing_yoy_nsa_df['Month'].map(month_code_dict)

housing_yoy_nsa_df

Unnamed: 0,Housing_YOY_NSA,Month,Year
0,-2.6,02,2012
1,-1.5,03,2012
2,-1.6,04,2012
3,-1.9,05,2012
4,-1.4,06,2012
...,...,...,...
101,-0.9,07,2020
102,-0.9,08,2020
103,-1.4,09,2020
104,-1.4,10,2020


In [18]:
# Create the new_housing_supply_df dataframe merging all other dataframes; join on Year and Month
housing_supply_df = pd.merge(housing_sa_df, housing_nsa_df, on=['Year','Month'], how='left')
housing_supply_df = pd.merge(housing_supply_df, housing_mom_sa_df, on=['Year','Month'], how='left')
housing_supply_df = pd.merge(housing_supply_df, housing_mom_nsa_df, on=['Year','Month'], how='left')
housing_supply_df = pd.merge(housing_supply_df, housing_yoy_sa_df, on=['Year','Month'], how='left')
housing_supply_df = pd.merge(housing_supply_df, housing_yoy_nsa_df, on=['Year','Month'], how='left')

housing_supply_df

Unnamed: 0,Year,Month,Housing_SA,Housing_NSA,Housing_MOM_SA,Housing_MOM_NSA,Housing_YOY_SA,Housing_YOY_NSA
0,2012,02,5.4,6.8,-0.4,-0.6,,-2.6
1,2012,03,5.6,5.4,0.2,-1.4,,-1.5
2,2012,04,5.5,5.3,-0.1,-0.1,,-1.6
3,2012,05,5.3,4.7,-0.2,-0.7,,-1.9
4,2012,06,5.2,4.4,-0.2,-0.3,,-1.4
...,...,...,...,...,...,...,...,...
101,2020,07,1.9,1.7,-0.6,-0.4,-1.0,-0.9
102,2020,08,1.8,1.7,-0.1,0.0,-1.1,-0.9
103,2020,09,1.6,1.7,-0.1,0.0,-1.3,-1.4
104,2020,10,1.5,1.6,-0.1,-0.1,-1.4,-1.4


In [19]:
# Classify the Housing Market as "Balanced Market", "Buyer's Market", or "Seller's Market"
# depending on the value of the _SA and _NSA Inventory
# Buyer's Market has >= 5 months of Inventory; Seller's Market has <= 3 months of Inventory
# Balanced Market has > 3 and < 5 months of Housing Inventory
housing_supply_df['Housing_Classification_SA'] = 'Balanced Market'
housing_supply_df['Housing_Classification_NSA'] = 'Balanced Market'

# Set Seller's Market
housing_supply_df['Housing_Classification_SA'] = np.where((housing_supply_df.Housing_SA<=3), "Seller's Market", housing_supply_df.Housing_Classification_SA)
housing_supply_df['Housing_Classification_NSA'] = np.where((housing_supply_df.Housing_NSA<=3), "Seller's Market", housing_supply_df.Housing_Classification_NSA)

# Set Buyer's Market
housing_supply_df['Housing_Classification_SA'] = np.where((housing_supply_df.Housing_SA>=5), "Buyer's Market", housing_supply_df.Housing_Classification_SA)
housing_supply_df['Housing_Classification_NSA'] = np.where((housing_supply_df.Housing_NSA>=5), "Buyer's Market", housing_supply_df.Housing_Classification_NSA)

housing_supply_df

Unnamed: 0,Year,Month,Housing_SA,Housing_NSA,Housing_MOM_SA,Housing_MOM_NSA,Housing_YOY_SA,Housing_YOY_NSA,Housing_Classification_SA,Housing_Classification_NSA
0,2012,02,5.4,6.8,-0.4,-0.6,,-2.6,Buyer's Market,Buyer's Market
1,2012,03,5.6,5.4,0.2,-1.4,,-1.5,Buyer's Market,Buyer's Market
2,2012,04,5.5,5.3,-0.1,-0.1,,-1.6,Buyer's Market,Buyer's Market
3,2012,05,5.3,4.7,-0.2,-0.7,,-1.9,Buyer's Market,Balanced Market
4,2012,06,5.2,4.4,-0.2,-0.3,,-1.4,Buyer's Market,Balanced Market
...,...,...,...,...,...,...,...,...,...,...
101,2020,07,1.9,1.7,-0.6,-0.4,-1.0,-0.9,Seller's Market,Seller's Market
102,2020,08,1.8,1.7,-0.1,0.0,-1.1,-0.9,Seller's Market,Seller's Market
103,2020,09,1.6,1.7,-0.1,0.0,-1.3,-1.4,Seller's Market,Seller's Market
104,2020,10,1.5,1.6,-0.1,-0.1,-1.4,-1.4,Seller's Market,Seller's Market


In [20]:
# Create a new field, year_month, containing month_name and year, for ease of sorting during plotting and charting
housing_supply_df['Year_Month'] = housing_supply_df['Year'] + '-' + housing_supply_df['Month']
housing_supply_df

Unnamed: 0,Year,Month,Housing_SA,Housing_NSA,Housing_MOM_SA,Housing_MOM_NSA,Housing_YOY_SA,Housing_YOY_NSA,Housing_Classification_SA,Housing_Classification_NSA,Year_Month
0,2012,02,5.4,6.8,-0.4,-0.6,,-2.6,Buyer's Market,Buyer's Market,2012-02
1,2012,03,5.6,5.4,0.2,-1.4,,-1.5,Buyer's Market,Buyer's Market,2012-03
2,2012,04,5.5,5.3,-0.1,-0.1,,-1.6,Buyer's Market,Buyer's Market,2012-04
3,2012,05,5.3,4.7,-0.2,-0.7,,-1.9,Buyer's Market,Balanced Market,2012-05
4,2012,06,5.2,4.4,-0.2,-0.3,,-1.4,Buyer's Market,Balanced Market,2012-06
...,...,...,...,...,...,...,...,...,...,...,...
101,2020,07,1.9,1.7,-0.6,-0.4,-1.0,-0.9,Seller's Market,Seller's Market,2020-07
102,2020,08,1.8,1.7,-0.1,0.0,-1.1,-0.9,Seller's Market,Seller's Market,2020-08
103,2020,09,1.6,1.7,-0.1,0.0,-1.3,-1.4,Seller's Market,Seller's Market,2020-09
104,2020,10,1.5,1.6,-0.1,-0.1,-1.4,-1.4,Seller's Market,Seller's Market,2020-10


In [21]:
# Write dataframes to csv file for plotting data in Tableau
housing_supply_df.to_csv(r'../data/housingsupplyrecords.csv')