In [1]:
#Import dependencies
import pandas as pd
from datetime import datetime
from sqlalchemy import create_engine
import numpy as np

In [16]:
# Load files

# Real Estate Housing Inventory - Seasonally Adjusted
housing_inventory_sa = '../Resources/Redfin-Months-Supply-SA.csv'
# Real Estate Housing Inventory - Non-Seasonally Adjusted
housing_inventory_nsa = '../Resources/Redfin-Months-Supply-NSA.csv'
# Real Estate Housing Inventory - Month-Over-Month - Seasonally Adjusted
housing_inventory_mom_sa = '../Resources/Redfin-Months-Supply-mom-SA.csv'
# Real Estate Housing Inventory - Month-Over-Month - Non-Seasonally Adjusted
housing_inventory_mom_nsa = '../Resources/Redfin-Months-Supply-mom-NSA.csv'
# Real Estate Housing Inventory - Month-Over-Month - Seasonally Adjusted
housing_inventory_yoy_sa = '../Resources/Redfin-Months-Supply-yoy-SA.csv'
# Real Estate Housing Inventory - Month-Over-Month - Non-Seasonally Adjusted
housing_inventory_yoy_nsa = '../Resources/Redfin-Months-Supply-yoy-NSA.csv'

In [34]:
# The correct encoding must be used to read the CSV in pandas
housing_sa_df = pd.read_csv(housing_inventory_sa)
housing_nsa_df = pd.read_csv(housing_inventory_nsa)
housing_mom_sa_df = pd.read_csv(housing_inventory_mom_sa)
housing_mom_nsa_df = pd.read_csv(housing_inventory_mom_nsa)
housing_yoy_sa_df = pd.read_csv(housing_inventory_yoy_sa)
housing_yoy_nsa_df = pd.read_csv(housing_inventory_yoy_nsa)

In [35]:
# Real Estate Housing Inventory - Seasonally Adjusted
housing_sa_df

Unnamed: 0,Region,National
0,"February 1, 2012",5.4
1,"March 1, 2012",5.6
2,"April 1, 2012",5.5
3,"May 1, 2012",5.3
4,"June 1, 2012",5.2
...,...,...
101,"July 1, 2020",1.9
102,"August 1, 2020",1.8
103,"September 1, 2020",1.6
104,"October 1, 2020",1.5


In [41]:
# Check the column names; note ' National' has a leading space
housing_nsa_df.columns

Index(['Housing_Supply_NSA', 'Month', 'Year'], dtype='object')

In [36]:
# housing_sa_df contains the monthly Supply of US Housing Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_sa_df[['Month','Day','Year']] = housing_sa_df.Region.str.split(" ",expand=True)
housing_sa_df= housing_sa_df.drop(['Region'], axis = 1) 
housing_sa_df= housing_sa_df.drop(['Day'], axis = 1)
housing_sa_df= housing_sa_df.rename(columns={' National': 'Housing_Supply_SA'})

# Change the month name to a 2-digit month code
# Create a dictionary of month_code keys & values
month_code_dict = {'January': '01',
                   'February': '02',
                   'March': '03',
                   'April': '04',
                   'May': '05',
                   'June': '06',
                   'July': '07',
                   'August': '08',
                   'September': '09',
                   'October': '10',
                   'November': '11',
                   'December,': '12'
                  }

# Remap the values of the new month_code field using the dictionary
housing_sa_df['Month'] = housing_sa_df['Month'].map(month_code_dict)

housing_sa_df

Unnamed: 0,Housing_Supply_SA,Month,Year
0,5.4,February,2012
1,5.6,March,2012
2,5.5,April,2012
3,5.3,May,2012
4,5.2,June,2012
...,...,...,...
101,1.9,July,2020
102,1.8,August,2020
103,1.6,September,2020
104,1.5,October,2020


In [37]:
# Real Estate Housing Inventory - Non-Seasonally Adjusted
housing_nsa_df

Unnamed: 0,Region,National
0,"February 1, 2012",6.8
1,"March 1, 2012",5.4
2,"April 1, 2012",5.3
3,"May 1, 2012",4.7
4,"June 1, 2012",4.4
...,...,...
101,"July 1, 2020",1.7
102,"August 1, 2020",1.7
103,"September 1, 2020",1.7
104,"October 1, 2020",1.6


In [40]:
# housing_nsa_df contains the monthly Supply of US Housing Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_nsa_df[['Month','Day','Year']] = housing_nsa_df.Region.str.split(" ",expand=True)
housing_nsa_df= housing_nsa_df.drop(['Region'], axis = 1) 
housing_nsa_df= housing_nsa_df.drop(['Day'], axis = 1)
housing_nsa_df= housing_nsa_df.rename(columns={' National': 'Housing_Supply_NSA'})

# Change the month name to a 2-digit month code
# Create a dictionary of month_code keys & values
month_code_dict = {'January': '01',
                   'February': '02',
                   'March': '03',
                   'April': '04',
                   'May': '05',
                   'June': '06',
                   'July': '07',
                   'August': '08',
                   'September': '09',
                   'October': '10',
                   'November': '11',
                   'December,': '12'
                  }

# Remap the values of the new month_code field using the dictionary
housing_nsa_df['Month'] = housing_nsa_df['Month'].map(month_code_dict)

housing_nsa_df

Unnamed: 0,Housing_Supply_NSA,Month,Year
0,6.8,February,2012
1,5.4,March,2012
2,5.3,April,2012
3,4.7,May,2012
4,4.4,June,2012
...,...,...,...
101,1.7,July,2020
102,1.7,August,2020
103,1.7,September,2020
104,1.6,October,2020


In [9]:
# Real Estate Housing Inventory - Month-Over-Month - Seasonally Adjusted
housing_mom_sa_df

Unnamed: 0,Region,National
0,"February 1, 2012",-0.4
1,"March 1, 2012",0.2
2,"April 1, 2012",-0.1
3,"May 1, 2012",-0.2
4,"June 1, 2012",-0.2
...,...,...
101,"July 1, 2020",-0.6
102,"August 1, 2020",-0.1
103,"September 1, 2020",-0.1
104,"October 1, 2020",-0.1


In [43]:
# housing_mom_sa_df contains the monthly Supply of US Housing Month-Over-Month Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_mom_sa_df[['Month','Day','Year']] = housing_mom_sa_df.Region.str.split(" ",expand=True)
housing_mom_sa_df= housing_mom_sa_df.drop(['Region'], axis = 1) 
housing_mom_sa_df= housing_mom_sa_df.drop(['Day'], axis = 1)
housing_mom_sa_df= housing_mom_sa_df.rename(columns={' National': 'Housing_MOM_SA'})

# Change the month name to a 2-digit month code
# Create a dictionary of month_code keys & values
month_code_dict = {'January': '01',
                   'February': '02',
                   'March': '03',
                   'April': '04',
                   'May': '05',
                   'June': '06',
                   'July': '07',
                   'August': '08',
                   'September': '09',
                   'October': '10',
                   'November': '11',
                   'December,': '12'
                  }

# Remap the values of the new month_code field using the dictionary
housing_mom_sa_df['Month'] = housing_mom_sa_df['Month'].map(month_code_dict)

housing_mom_sa_df

Unnamed: 0,Housing_MOM_SA,Month,Year
0,-0.4,February,2012
1,0.2,March,2012
2,-0.1,April,2012
3,-0.2,May,2012
4,-0.2,June,2012
...,...,...,...
101,-0.6,July,2020
102,-0.1,August,2020
103,-0.1,September,2020
104,-0.1,October,2020


In [10]:
# Real Estate Housing Inventory - Month-Over-Month - Non-Seasonally Adjusted
housing_mom_nsa_df

Unnamed: 0,Region,National
0,"February 1, 2012",-0.6
1,"March 1, 2012",-1.4
2,"April 1, 2012",-0.1
3,"May 1, 2012",-0.7
4,"June 1, 2012",-0.3
...,...,...
101,"July 1, 2020",-0.4
102,"August 1, 2020",0.0
103,"September 1, 2020",0.0
104,"October 1, 2020",-0.1


In [44]:
# housing_mom_nsa_df contains the monthly Supply of US Housing Month-Over-Month Non-Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_mom_nsa_df[['Month','Day','Year']] = housing_mom_nsa_df.Region.str.split(" ",expand=True)
housing_mom_nsa_df= housing_mom_nsa_df.drop(['Region'], axis = 1) 
housing_mom_nsa_df= housing_mom_nsa_df.drop(['Day'], axis = 1)
housing_mom_nsa_df= housing_mom_nsa_df.rename(columns={' National': 'Housing_MOM_NSA'})

# Change the month name to a 2-digit month code
# Create a dictionary of month_code keys & values
month_code_dict = {'January': '01',
                   'February': '02',
                   'March': '03',
                   'April': '04',
                   'May': '05',
                   'June': '06',
                   'July': '07',
                   'August': '08',
                   'September': '09',
                   'October': '10',
                   'November': '11',
                   'December,': '12'
                  }

# Remap the values of the new month_code field using the dictionary
housing_mom_nsa_df['Month'] = housing_mom_nsa_df['Month'].map(month_code_dict)

housing_mom_nsa_df

AttributeError: 'DataFrame' object has no attribute 'Region'

In [11]:
# Real Estate Housing Inventory - Year-Over-Year - Seasonally Adjusted
housing_yoy_sa_df

Unnamed: 0,Region,National
0,"February 1, 2012",
1,"March 1, 2012",
2,"April 1, 2012",
3,"May 1, 2012",
4,"June 1, 2012",
...,...,...
101,"July 1, 2020",-1.0
102,"August 1, 2020",-1.1
103,"September 1, 2020",-1.3
104,"October 1, 2020",-1.4


In [None]:
# housing_yoy_sa_df contains the monthly Supply of US Housing Year-Over-Year Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_yoy_sa_df[['Month','Day','Year']] = housing_yoy_sa_df.Region.str.split(" ",expand=True)
housing_yoy_sa_df= housing_yoy_sa_df.drop(['Region'], axis = 1) 
housing_yoy_sa_df= housing_yoy_sa_df.drop(['Day'], axis = 1)
housing_yoy_sa_df= housing_yoy_sa_df.rename(columns={' National': 'Housing_YOY_SA'})

# Change the month name to a 2-digit month code
# Create a dictionary of month_code keys & values
month_code_dict = {'January': '01',
                   'February': '02',
                   'March': '03',
                   'April': '04',
                   'May': '05',
                   'June': '06',
                   'July': '07',
                   'August': '08',
                   'September': '09',
                   'October': '10',
                   'November': '11',
                   'December,': '12'
                  }

# Remap the values of the new month_code field using the dictionary
housing_yoy_sa_df['Month'] = housing_yoy_sa_df['Month'].map(month_code_dict)

housing_yoy_sa_df

In [12]:
# Real Estate Housing Inventory - Year-Over-Year - Non-Seasonally Adjusted
housing_yoy_nsa_df

Unnamed: 0,Region,National
0,"February 1, 2012",-2.6
1,"March 1, 2012",-1.5
2,"April 1, 2012",-1.6
3,"May 1, 2012",-1.9
4,"June 1, 2012",-1.4
...,...,...
101,"July 1, 2020",-0.9
102,"August 1, 2020",-0.9
103,"September 1, 2020",-1.4
104,"October 1, 2020",-1.4


In [None]:
# housing_yoy_nsa_df contains the monthly Supply of US Housing Year-Over-Year Non-Seasonally Adjusted
# Split the year into month and year; drop the 'Region' and 'Day' columns
housing_yoy_nsa_df[['Month','Day','Year']] = housing_yoy_nsa_df.Region.str.split(" ",expand=True)
housing_yoy_nsa_df= housing_yoy_nsa_df.drop(['Region'], axis = 1) 
housing_yoy_nsa_df= housing_yoy_nsa_df.drop(['Day'], axis = 1)
housing_yoy_nsa_df= housing_yoy_nsa_df.rename(columns={' National': 'Housing_YOY_NSA'})

# Change the month name to a 2-digit month code
# Create a dictionary of month_code keys & values
month_code_dict = {'January': '01',
                   'February': '02',
                   'March': '03',
                   'April': '04',
                   'May': '05',
                   'June': '06',
                   'July': '07',
                   'August': '08',
                   'September': '09',
                   'October': '10',
                   'November': '11',
                   'December,': '12'
                  }

# Remap the values of the new month_code field using the dictionary
housing_yoy_nsa_df['Month'] = housing_yoy_nsa_df['Month'].map(month_code_dict)

housing_yoy_nsa_df

NameError: name 'MNSA' is not defined

In [12]:
MNSA['SA'] = MSA['SA']

In [13]:
MNSA['MOM_NSA']= MOMNSA['MOM_NSA']

In [14]:
MNSA['MOM_SA'] = MOMSA['MOM_SA']

In [15]:
MNSA['YOY_NSA'] = YOYNSA['YOY_NSA']

In [16]:
MNSA['YOY_SA'] = YOYSA['YOY_SA']

In [17]:
MNSA

Unnamed: 0,NSA,Year,Months,SA,MOM_NSA,MOM_SA,YOY_NSA,YOY_SA
0,6.8,2012,February,5.4,-0.6,-0.4,-2.6,
1,5.4,2012,March,5.6,-1.4,0.2,-1.5,
2,5.3,2012,April,5.5,-0.1,-0.1,-1.6,
3,4.7,2012,May,5.3,-0.7,-0.2,-1.9,
4,4.4,2012,June,5.2,-0.3,-0.2,-1.4,
...,...,...,...,...,...,...,...,...
101,1.7,2020,July,1.9,-0.4,-0.6,-0.9,-1.0
102,1.7,2020,August,1.8,0.0,-0.1,-0.9,-1.1
103,1.7,2020,September,1.6,0.0,-0.1,-1.4,-1.3
104,1.6,2020,October,1.5,-0.1,-0.1,-1.4,-1.4


In [18]:
MNSA = MNSA[['Year', 'Months', 'NSA', 'SA', 'MOM_NSA', 'YOY_NSA','YOY_SA']]
MNSA

Unnamed: 0,Year,Months,NSA,SA,MOM_NSA,YOY_NSA,YOY_SA
0,2012,February,6.8,5.4,-0.6,-2.6,
1,2012,March,5.4,5.6,-1.4,-1.5,
2,2012,April,5.3,5.5,-0.1,-1.6,
3,2012,May,4.7,5.3,-0.7,-1.9,
4,2012,June,4.4,5.2,-0.3,-1.4,
...,...,...,...,...,...,...,...
101,2020,July,1.7,1.9,-0.4,-0.9,-1.0
102,2020,August,1.7,1.8,0.0,-0.9,-1.1
103,2020,September,1.7,1.6,0.0,-1.4,-1.3
104,2020,October,1.6,1.5,-0.1,-1.4,-1.4


In [19]:
MNSA.dtypes

Year        object
Months      object
NSA        float64
SA         float64
MOM_NSA    float64
YOY_NSA    float64
YOY_SA     float64
dtype: object

In [20]:
MNSA['Year'] = MNSA['Year'].apply(pd.to_numeric)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  MNSA['Year'] = MNSA['Year'].apply(pd.to_numeric)


In [21]:
MNSA

Unnamed: 0,Year,Months,NSA,SA,MOM_NSA,YOY_NSA,YOY_SA
0,2012,February,6.8,5.4,-0.6,-2.6,
1,2012,March,5.4,5.6,-1.4,-1.5,
2,2012,April,5.3,5.5,-0.1,-1.6,
3,2012,May,4.7,5.3,-0.7,-1.9,
4,2012,June,4.4,5.2,-0.3,-1.4,
...,...,...,...,...,...,...,...
101,2020,July,1.7,1.9,-0.4,-0.9,-1.0
102,2020,August,1.7,1.8,0.0,-0.9,-1.1
103,2020,September,1.7,1.6,0.0,-1.4,-1.3
104,2020,October,1.6,1.5,-0.1,-1.4,-1.4


In [22]:
balanced_market = MNSA.loc[(MNSA["NSA"] > 3 ) & (MNSA["NSA"]<5 )]
balanced_market

Unnamed: 0,Year,Months,NSA,SA,MOM_NSA,YOY_NSA,YOY_SA
3,2012,May,4.7,5.3,-0.7,-1.9,
4,2012,June,4.4,5.2,-0.3,-1.4,
5,2012,July,4.7,5.1,0.3,-1.8,
6,2012,August,4.2,4.9,-0.5,-1.6,
8,2012,October,4.6,4.6,-0.5,-2.2,
...,...,...,...,...,...,...,...
84,2019,February,3.8,3.1,-0.4,0.3,0.3
91,2019,September,3.1,3.0,0.5,-0.3,-0.1
95,2020,January,3.3,2.7,0.9,-0.9,-0.6
96,2020,February,3.1,2.4,-0.2,-0.7,-0.8


In [23]:
balanced_market['State'] = 'Balanced_Market'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  balanced_market['State'] = 'Balanced_Market'


In [24]:
balanced_market

Unnamed: 0,Year,Months,NSA,SA,MOM_NSA,YOY_NSA,YOY_SA,State
3,2012,May,4.7,5.3,-0.7,-1.9,,Balanced_Market
4,2012,June,4.4,5.2,-0.3,-1.4,,Balanced_Market
5,2012,July,4.7,5.1,0.3,-1.8,,Balanced_Market
6,2012,August,4.2,4.9,-0.5,-1.6,,Balanced_Market
8,2012,October,4.6,4.6,-0.5,-2.2,,Balanced_Market
...,...,...,...,...,...,...,...,...
84,2019,February,3.8,3.1,-0.4,0.3,0.3,Balanced_Market
91,2019,September,3.1,3.0,0.5,-0.3,-0.1,Balanced_Market
95,2020,January,3.3,2.7,0.9,-0.9,-0.6,Balanced_Market
96,2020,February,3.1,2.4,-0.2,-0.7,-0.8,Balanced_Market


In [25]:
Seller_Market = MNSA.loc[MNSA["NSA"] <=  3]

In [26]:
Seller_Market['State'] = 'Seller_Market'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Seller_Market['State'] = 'Seller_Market'


In [27]:
Buyer_Market = MNSA.loc[MNSA["NSA"] >= 5]

In [28]:
Buyer_Market['State'] = 'Buyer_Market'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Buyer_Market['State'] = 'Buyer_Market'


In [30]:
frames = [Buyer_Market, Seller_Market, balanced_market]
Market = pd.concat(frames)

Market.reset_index(inplace = True,drop = True)
Market

Unnamed: 0,Year,Months,NSA,SA,MOM_NSA,YOY_NSA,YOY_SA,State
0,2012,February,6.8,5.4,-0.6,-2.6,,Buyer_Market
1,2012,March,5.4,5.6,-1.4,-1.5,,Buyer_Market
2,2012,April,5.3,5.5,-0.1,-1.6,,Buyer_Market
3,2012,September,5.1,4.7,0.9,-1.3,,Buyer_Market
4,2013,January,5.4,4.3,1.1,-2.1,-1.5,Buyer_Market
...,...,...,...,...,...,...,...,...
101,2019,February,3.8,3.1,-0.4,0.3,0.3,Balanced_Market
102,2019,September,3.1,3.0,0.5,-0.3,-0.1,Balanced_Market
103,2020,January,3.3,2.7,0.9,-0.9,-0.6,Balanced_Market
104,2020,February,3.1,2.4,-0.2,-0.7,-0.8,Balanced_Market
