In [1]:
import pandas as pd
from pathlib import Path

In [2]:
housing_df = pd.read_csv(Path('Resources/clean_housing.csv'))
housing_df.head()

Unnamed: 0,period_begin,period_end,state,property_type,median_sale_price,homes_sold
0,2020-11-01,2020-11-30,Illinois,All Residential,240700,13675
1,2019-07-01,2019-07-31,Connecticut,Single Family Residential,310900,3920
2,2016-11-01,2016-11-30,Illinois,Condo/Co-op,196800,1970
3,2013-02-01,2013-02-28,Kansas,Single Family Residential,147600,1150
4,2020-02-01,2020-02-29,Virginia,Multi-Family (2-4 Unit),217900,67


In [3]:
# drop columns
filtered_housing_df = housing_df.drop(["period_end"],axis=1)
filtered_housing_df.head()

Unnamed: 0,period_begin,state,property_type,median_sale_price,homes_sold
0,2020-11-01,Illinois,All Residential,240700,13675
1,2019-07-01,Connecticut,Single Family Residential,310900,3920
2,2016-11-01,Illinois,Condo/Co-op,196800,1970
3,2013-02-01,Kansas,Single Family Residential,147600,1150
4,2020-02-01,Virginia,Multi-Family (2-4 Unit),217900,67


In [4]:
# Reorder columns
filtered_housing_df = filtered_housing_df[['state','period_begin','property_type','median_sale_price','homes_sold']]
filtered_housing_df

Unnamed: 0,state,period_begin,property_type,median_sale_price,homes_sold
0,Illinois,2020-11-01,All Residential,240700,13675
1,Connecticut,2019-07-01,Single Family Residential,310900,3920
2,Illinois,2016-11-01,Condo/Co-op,196800,1970
3,Kansas,2013-02-01,Single Family Residential,147600,1150
4,Virginia,2020-02-01,Multi-Family (2-4 Unit),217900,67
...,...,...,...,...,...
28970,Hawaii,2021-10-01,Townhouse,1591000,21
28971,Georgia,2015-08-01,Condo/Co-op,158200,662
28972,Maine,2013-11-01,Single Family Residential,147400,370
28973,Oregon,2016-12-01,Single Family Residential,312600,4511


In [5]:
# Check Data Types
filtered_housing_df.dtypes

state                object
period_begin         object
property_type        object
median_sale_price     int64
homes_sold            int64
dtype: object

In [6]:
# Change date to only include year & chnage data type
filtered_housing_df['period_begin'] = pd.to_datetime(filtered_housing_df['period_begin'], errors="raise")
type(filtered_housing_df['period_begin'][0])
filtered_housing_df

Unnamed: 0,state,period_begin,property_type,median_sale_price,homes_sold
0,Illinois,2020-11-01,All Residential,240700,13675
1,Connecticut,2019-07-01,Single Family Residential,310900,3920
2,Illinois,2016-11-01,Condo/Co-op,196800,1970
3,Kansas,2013-02-01,Single Family Residential,147600,1150
4,Virginia,2020-02-01,Multi-Family (2-4 Unit),217900,67
...,...,...,...,...,...
28970,Hawaii,2021-10-01,Townhouse,1591000,21
28971,Georgia,2015-08-01,Condo/Co-op,158200,662
28972,Maine,2013-11-01,Single Family Residential,147400,370
28973,Oregon,2016-12-01,Single Family Residential,312600,4511


In [7]:
# Check Data Types
filtered_housing_df.dtypes

state                        object
period_begin         datetime64[ns]
property_type                object
median_sale_price             int64
homes_sold                    int64
dtype: object

In [8]:
# rename 'period began' with 'year'
filtered_housing_df = filtered_housing_df.rename({'period_begin':'year'}, axis='columns')
filtered_housing_df.head()

Unnamed: 0,state,year,property_type,median_sale_price,homes_sold
0,Illinois,2020-11-01,All Residential,240700,13675
1,Connecticut,2019-07-01,Single Family Residential,310900,3920
2,Illinois,2016-11-01,Condo/Co-op,196800,1970
3,Kansas,2013-02-01,Single Family Residential,147600,1150
4,Virginia,2020-02-01,Multi-Family (2-4 Unit),217900,67


In [9]:
# Find how many state there are
len(filtered_housing_df['state'].unique())

48

In [10]:
# sort in alphabetical order
unique_Hstate = filtered_housing_df['state'].unique()
unique_Hstate.sort()

In [11]:
# Print list
for state in unique_Hstate:
    print(state)

Alabama
Alaska
Arizona
Arkansas
California
Colorado
Columbia
Connecticut
Delaware
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
Nebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virginia
Wisconsin


### missing three states montana, north dekota, wyoming

In [12]:
filtered_housing_df

Unnamed: 0,state,year,property_type,median_sale_price,homes_sold
0,Illinois,2020-11-01,All Residential,240700,13675
1,Connecticut,2019-07-01,Single Family Residential,310900,3920
2,Illinois,2016-11-01,Condo/Co-op,196800,1970
3,Kansas,2013-02-01,Single Family Residential,147600,1150
4,Virginia,2020-02-01,Multi-Family (2-4 Unit),217900,67
...,...,...,...,...,...
28970,Hawaii,2021-10-01,Townhouse,1591000,21
28971,Georgia,2015-08-01,Condo/Co-op,158200,662
28972,Maine,2013-11-01,Single Family Residential,147400,370
28973,Oregon,2016-12-01,Single Family Residential,312600,4511


In [13]:
filtered_housing_df.to_csv(r'Resources/USML_filtered_housing.csv', index = False)