# Tannis McCartney
## June 17, 2022

##  Data Preparation - NS Median Income Tables  

## Table of Contents
### 01 Import Libraries
### 02 Import Dataset
### 03 Data Wrangling
### 04 Consistency Checks
### 05 Export cleaned dataframe

# 01 Import Libraries

In [1]:
# Import libraries for analysis
import pandas as pd
import numpy as np
import os

# 02 Import Datasets

In [2]:
# Turn project folder path into a string
path = r'C:\Users\tmmcc\Google Drive\Data Analytics Bootcamp\6 Advanced Analytics and Dashboard Design\Nova Scotia Housing and Income'  

In [3]:
# Load housing market indicators table
NSHousing = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', 'housing-market-indicators-nova-scotia-1990-2016.csv'), index_col=False)
NSHousing.head()

Unnamed: 0,indicator,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,"Starts, total",5560.0,5173.0,4673.0,4282.0,4748.0,4168.0,4059.0,3813.0,3137.0,...,4750.0,3982.0,3438.0,4309.0,4644.0,4522.0,3919.0,3056.0,3825.0,3767.0
1,"Starts, Single-detached",3639.0,2604.0,3232.0,3126.0,3358.0,3040.0,3278.0,2939.0,2257.0,...,2887.0,2636.0,2193.0,2392.0,2045.0,2258.0,1639.0,1355.0,1350.0,1656.0
2,"Starts, Multiple",1921.0,2569.0,1441.0,1156.0,1390.0,1128.0,781.0,874.0,880.0,...,1863.0,1346.0,1245.0,1917.0,2599.0,2264.0,2280.0,1701.0,2475.0,2111.0
3,"Starts, Semi-detached",310.0,301.0,373.0,496.0,518.0,417.0,447.0,303.0,290.0,...,333.0,328.0,274.0,373.0,418.0,420.0,332.0,220.0,259.0,318.0
4,"Starts, Row",100.0,46.0,93.0,81.0,99.0,72.0,59.0,58.0,89.0,...,221.0,219.0,187.0,200.0,241.0,218.0,259.0,179.0,129.0,144.0


In [4]:
# Check shape of table
NSHousing.shape

(35, 28)

# 03 Data Wrangling

In [5]:
# Transpose table
df_T = NSHousing.transpose()
df_T.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
indicator,"Starts, total","Starts, Single-detached","Starts, Multiple","Starts, Semi-detached","Starts, Row","Starts, Apartment","Starts by intended market, 1 total","Starts, Homeownership - freehold","Starts, Rental","Starts, Homeownership - condominium",...,Average rent ($) Bachelor,Average rent ($) One-bedroom,Average rent ($) Two-bedroom,Average rent ($) 3+ bedroom,Population on July 1 (thousands)4,Labour force participation rate (%)4,Employment (% change)5,Unemployment rate (%)4,Real disposable income (% change)5,Net migration5
1990,5560.0,3639.0,1921.0,310.0,100.0,1511.0,3650.0,2199.0,1107.0,12.0,...,391.0,460.0,539.0,658.0,910.5,61.9,1.0,10.7,,
1991,5173.0,2604.0,2569.0,301.0,46.0,2222.0,3681.0,1590.0,1691.0,0.0,...,408.0,481.0,565.0,690.0,915.0,61.5,-1.2,12.1,-1.5,
1992,4673.0,3232.0,1441.0,373.0,93.0,975.0,3222.0,2190.0,764.0,0.0,...,409.0,475.0,562.0,687.0,919.5,60.0,-3.1,13.1,2.3,1217.0
1993,4282.0,3126.0,1156.0,496.0,81.0,579.0,2988.0,2306.0,642.0,0.0,...,412.0,482.0,571.0,719.0,923.9,60.1,-0.6,14.3,2.1,1676.0


In [6]:
# Add an index to transposed dataframe
df_T.reset_index()

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,25,26,27,28,29,30,31,32,33,34
0,indicator,"Starts, total","Starts, Single-detached","Starts, Multiple","Starts, Semi-detached","Starts, Row","Starts, Apartment","Starts by intended market, 1 total","Starts, Homeownership - freehold","Starts, Rental",...,Average rent ($) Bachelor,Average rent ($) One-bedroom,Average rent ($) Two-bedroom,Average rent ($) 3+ bedroom,Population on July 1 (thousands)4,Labour force participation rate (%)4,Employment (% change)5,Unemployment rate (%)4,Real disposable income (% change)5,Net migration5
1,1990,5560.0,3639.0,1921.0,310.0,100.0,1511.0,3650.0,2199.0,1107.0,...,391.0,460.0,539.0,658.0,910.5,61.9,1.0,10.7,,
2,1991,5173.0,2604.0,2569.0,301.0,46.0,2222.0,3681.0,1590.0,1691.0,...,408.0,481.0,565.0,690.0,915.0,61.5,-1.2,12.1,-1.5,
3,1992,4673.0,3232.0,1441.0,373.0,93.0,975.0,3222.0,2190.0,764.0,...,409.0,475.0,562.0,687.0,919.5,60.0,-3.1,13.1,2.3,1217.0
4,1993,4282.0,3126.0,1156.0,496.0,81.0,579.0,2988.0,2306.0,642.0,...,412.0,482.0,571.0,719.0,923.9,60.1,-0.6,14.3,2.1,1676.0
5,1994,4748.0,3358.0,1390.0,518.0,99.0,773.0,3304.0,2493.0,671.0,...,421.0,485.0,579.0,711.0,926.9,60.1,1.7,13.5,-0.8,409.0
6,1995,4168.0,3040.0,1128.0,417.0,72.0,639.0,2695.0,2098.0,570.0,...,422.0,488.0,584.0,760.0,928.1,59.4,0.9,12.2,0.3,-432.0
7,1996,4059.0,3278.0,781.0,447.0,59.0,275.0,2806.0,2527.0,279.0,...,421.0,488.0,588.0,732.0,931.3,59.4,0.2,12.4,-2.5,1475.0
8,1997,3813.0,2939.0,874.0,303.0,58.0,513.0,2732.0,2225.0,479.0,...,424.0,490.0,589.0,722.0,932.4,59.7,1.5,12.1,1.4,695.0
9,1998,3137.0,2257.0,880.0,290.0,89.0,501.0,2171.0,1664.0,418.0,...,433.0,496.0,603.0,763.0,931.8,60.5,3.6,10.5,3.8,-547.0


In [7]:
# Create a new header for transposed dataframe
new_header = df_T.iloc[0]
new_header

0                                         Starts, total
1                               Starts, Single-detached
2                                     Starts,  Multiple
3                              Starts,    Semi-detached
4                                       Starts,     Row
5                                Starts,      Apartment
6                    Starts by intended market, 1 total
7                     Starts,  Homeownership - freehold
8                                      Starts,   Rental
9                 Starts,   Homeownership - condominium
10                  Starts,   Other (co-op and unknown)
11                                   Completions, total
12                        Residential Building Permits4
13         Residential Building Permits($) (thousands)4
14         Supply, Newly completed and unabsorbed homes
15                   Supply,  Single- and semi-detached
16                            Supply, Row and apartment
17                             Rental vacancy ra

In [8]:
# Create a new dataframe that starts after the first row of dataframe
df = df_T[1:]
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
1990,5560.0,3639.0,1921.0,310.0,100.0,1511.0,3650.0,2199.0,1107.0,12.0,...,391.0,460.0,539.0,658.0,910.5,61.9,1.0,10.7,,
1991,5173.0,2604.0,2569.0,301.0,46.0,2222.0,3681.0,1590.0,1691.0,0.0,...,408.0,481.0,565.0,690.0,915.0,61.5,-1.2,12.1,-1.5,
1992,4673.0,3232.0,1441.0,373.0,93.0,975.0,3222.0,2190.0,764.0,0.0,...,409.0,475.0,562.0,687.0,919.5,60.0,-3.1,13.1,2.3,1217.0
1993,4282.0,3126.0,1156.0,496.0,81.0,579.0,2988.0,2306.0,642.0,0.0,...,412.0,482.0,571.0,719.0,923.9,60.1,-0.6,14.3,2.1,1676.0
1994,4748.0,3358.0,1390.0,518.0,99.0,773.0,3304.0,2493.0,671.0,0.0,...,421.0,485.0,579.0,711.0,926.9,60.1,1.7,13.5,-0.8,409.0
1995,4168.0,3040.0,1128.0,417.0,72.0,639.0,2695.0,2098.0,570.0,24.0,...,422.0,488.0,584.0,760.0,928.1,59.4,0.9,12.2,0.3,-432.0
1996,4059.0,3278.0,781.0,447.0,59.0,275.0,2806.0,2527.0,279.0,0.0,...,421.0,488.0,588.0,732.0,931.3,59.4,0.2,12.4,-2.5,1475.0
1997,3813.0,2939.0,874.0,303.0,58.0,513.0,2732.0,2225.0,479.0,0.0,...,424.0,490.0,589.0,722.0,932.4,59.7,1.5,12.1,1.4,695.0
1998,3137.0,2257.0,880.0,290.0,89.0,501.0,2171.0,1664.0,418.0,89.0,...,433.0,496.0,603.0,763.0,931.8,60.5,3.6,10.5,3.8,-547.0
1999,4250.0,3345.0,905.0,218.0,29.0,658.0,2834.0,2263.0,571.0,0.0,...,448.0,507.0,609.0,761.0,933.8,60.8,2.0,9.6,4.4,1991.0


In [9]:
# Add new header to new dataframe
df.columns = new_header
df

indicator,"Starts, total","Starts, Single-detached","Starts, Multiple","Starts, Semi-detached","Starts, Row","Starts, Apartment","Starts by intended market, 1 total","Starts, Homeownership - freehold","Starts, Rental","Starts, Homeownership - condominium",...,Average rent ($) Bachelor,Average rent ($) One-bedroom,Average rent ($) Two-bedroom,Average rent ($) 3+ bedroom,Population on July 1 (thousands)4,Labour force participation rate (%)4,Employment (% change)5,Unemployment rate (%)4,Real disposable income (% change)5,Net migration5
1990,5560.0,3639.0,1921.0,310.0,100.0,1511.0,3650.0,2199.0,1107.0,12.0,...,391.0,460.0,539.0,658.0,910.5,61.9,1.0,10.7,,
1991,5173.0,2604.0,2569.0,301.0,46.0,2222.0,3681.0,1590.0,1691.0,0.0,...,408.0,481.0,565.0,690.0,915.0,61.5,-1.2,12.1,-1.5,
1992,4673.0,3232.0,1441.0,373.0,93.0,975.0,3222.0,2190.0,764.0,0.0,...,409.0,475.0,562.0,687.0,919.5,60.0,-3.1,13.1,2.3,1217.0
1993,4282.0,3126.0,1156.0,496.0,81.0,579.0,2988.0,2306.0,642.0,0.0,...,412.0,482.0,571.0,719.0,923.9,60.1,-0.6,14.3,2.1,1676.0
1994,4748.0,3358.0,1390.0,518.0,99.0,773.0,3304.0,2493.0,671.0,0.0,...,421.0,485.0,579.0,711.0,926.9,60.1,1.7,13.5,-0.8,409.0
1995,4168.0,3040.0,1128.0,417.0,72.0,639.0,2695.0,2098.0,570.0,24.0,...,422.0,488.0,584.0,760.0,928.1,59.4,0.9,12.2,0.3,-432.0
1996,4059.0,3278.0,781.0,447.0,59.0,275.0,2806.0,2527.0,279.0,0.0,...,421.0,488.0,588.0,732.0,931.3,59.4,0.2,12.4,-2.5,1475.0
1997,3813.0,2939.0,874.0,303.0,58.0,513.0,2732.0,2225.0,479.0,0.0,...,424.0,490.0,589.0,722.0,932.4,59.7,1.5,12.1,1.4,695.0
1998,3137.0,2257.0,880.0,290.0,89.0,501.0,2171.0,1664.0,418.0,89.0,...,433.0,496.0,603.0,763.0,931.8,60.5,3.6,10.5,3.8,-547.0
1999,4250.0,3345.0,905.0,218.0,29.0,658.0,2834.0,2263.0,571.0,0.0,...,448.0,507.0,609.0,761.0,933.8,60.8,2.0,9.6,4.4,1991.0


In [10]:
# Look at info for table
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 27 entries, 1990 to 2016
Data columns (total 35 columns):
 #   Column                                               Non-Null Count  Dtype 
---  ------                                               --------------  ----- 
 0   Starts, total                                        27 non-null     object
 1   Starts, Single-detached                              27 non-null     object
 2    Starts,  Multiple                                   27 non-null     object
 3   Starts,    Semi-detached                             27 non-null     object
 4   Starts,     Row                                      27 non-null     object
 5   Starts,      Apartment                               27 non-null     object
 6   Starts by intended market, 1 total                   27 non-null     object
 7   Starts,  Homeownership - freehold                    27 non-null     object
 8   Starts,   Rental                                     27 non-null     object
 9   S

In [11]:
# Rename columns for clarity
df = df.rename(columns={
    'Starts, total' : 'total_starts',
    'Starts, Single-detached':'single_detached_starts',
    ' Starts,  Multiple':'multiple_starts',
    'Starts,    Semi-detached':'semi_detached_starts',
    'Starts,     Row':'row_starts',
    'Starts,      Apartment':'apartment_starts',
    'Starts by intended market, 1 total':'starts_by_market',
    'Starts,  Homeownership - freehold':'freehold_homeownership_starts',
    'Starts,   Rental':'rental_starts',
    'Starts,   Homeownership - condominium':'condominium_ownership_starts',
    'Starts,   Other (co-op and unknown)':'co_op_and_unknown_starts',
    'Completions, total':'total_completions',
    'Residential Building Permits4':'residential_building_permits',
    'Residential Building Permits($) (thousands)4':'res_bldg_permit_value_thousands',
    'Supply, Newly completed and unabsorbed homes':'new_completed_and_unabsorbed_home_supply',
    'Supply,  Single- and semi-detached':'single_and_semi_detached_supply',
    'Supply, Row and apartment':'row_and_apartment_supply',
    'Rental vacancy rate (%)3':'rental_vacancy_percent',
    'Rental availability rate (%)3':'rental_availabilty_rate_percent',
    'Vacancy Rate (Standard Spaces) in Seniors\' Rental 6':'standard_space_seniors_vacancy_rate',
    'New Housing Price Index (% change)5':'pct_change_new_housing_price_index',
    'Consumer Price Index (% change)5':'pct_change_consumer_price_index',
    'Construction Wage Rate Index (% change)5':'pct_change_construction_wage_rate_index',
    'Owned accommodation costs (% change)5':'pct_change_owned_accommodation_costs',
    'Rental accommodation costs (% change)5':'pct_change_rental_accommodation_costs',
    'Average rent ($)  Bachelor':'bachelor_average_rent_dollars',
    'Average rent ($)  One-bedroom':'one_bedroom_average_rent',
    'Average rent ($)  Two-bedroom':'two_bed_average_rent_dollars',
    'Average rent ($)  3+ bedroom':'three_plus_bed_average_rent_dollars',
    'Population on July 1 (thousands)4':'july_1_population_thousands',
    'Labour force participation rate (%)4':'labour_force_participation_rate_percent',
    'Employment (% change)5':'pct_change_employment',
    'Unemployment rate (%)4':'pct_change_unemployment',
    'Real disposable income (% change)5':'pct_change_real_disposable_income',
    'Net migration5':'net_migration',
})

In [12]:
# Check info of dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 27 entries, 1990 to 2016
Data columns (total 35 columns):
 #   Column                                    Non-Null Count  Dtype 
---  ------                                    --------------  ----- 
 0   total_starts                              27 non-null     object
 1   single_detached_starts                    27 non-null     object
 2   multiple_starts                           27 non-null     object
 3   semi_detached_starts                      27 non-null     object
 4   row_starts                                27 non-null     object
 5   apartment_starts                          27 non-null     object
 6   starts_by_market                          27 non-null     object
 7   freehold_homeownership_starts             27 non-null     object
 8   rental_starts                             27 non-null     object
 9   condominium_ownership_starts              27 non-null     object
 10  co_op_and_unknown_starts                  27 non-nul

In [13]:
# Change columns with object type to numeric
df['total_starts'] = pd.to_numeric(df['total_starts'], downcast='float', errors='coerce')
df['single_detached_starts'] = pd.to_numeric(df['single_detached_starts'], downcast='float', errors='coerce')
df['multiple_starts'] = pd.to_numeric(df['multiple_starts'], downcast='float', errors='coerce')
df['semi_detached_starts'] = pd.to_numeric(df['semi_detached_starts'], downcast='float', errors='coerce')
df['row_starts'] = pd.to_numeric(df['row_starts'], downcast='float', errors='coerce')
df['rental_starts'] = pd.to_numeric(df['rental_starts'], downcast='float', errors='coerce')
df['apartment_starts'] = pd.to_numeric(df['apartment_starts'], downcast='float', errors='coerce')
df['starts_by_market'] = pd.to_numeric(df['starts_by_market'], downcast='float', errors='coerce')
df['freehold_homeownership_starts'] = pd.to_numeric(df['freehold_homeownership_starts'], downcast='float', errors='coerce')
df['condominium_ownership_starts'] = pd.to_numeric(df['condominium_ownership_starts'], downcast='float', errors='coerce')
df['co_op_and_unknown_starts'] = pd.to_numeric(df['co_op_and_unknown_starts'], downcast='float', errors='coerce')
df['total_completions'] = pd.to_numeric(df['total_completions'], downcast='float', errors='coerce')
df['residential_building_permits'] = pd.to_numeric(df['residential_building_permits'], downcast='float', errors='coerce')
df['res_bldg_permit_value_thousands'] = pd.to_numeric(df['res_bldg_permit_value_thousands'], downcast='float', errors='coerce')
df['new_completed_and_unabsorbed_home_supply'] = pd.to_numeric(df['new_completed_and_unabsorbed_home_supply'], downcast='float', errors='coerce')
df['single_and_semi_detached_supply'] = pd.to_numeric(df['single_and_semi_detached_supply'], downcast='float', errors='coerce')
df['row_and_apartment_supply'] = pd.to_numeric(df['row_and_apartment_supply'], downcast='float', errors='coerce')
df['rental_vacancy_percent'] = pd.to_numeric(df['rental_vacancy_percent'], downcast='float', errors='coerce')
df['rental_availabilty_rate_percent'] = pd.to_numeric(df['rental_availabilty_rate_percent'], downcast='float', errors='coerce')
df['standard_space_seniors_vacancy_rate'] = pd.to_numeric(df['standard_space_seniors_vacancy_rate'], downcast='float', errors='coerce')
df['pct_change_new_housing_price_index'] = pd.to_numeric(df['pct_change_new_housing_price_index'], downcast='float', errors='coerce')
df['pct_change_consumer_price_index'] = pd.to_numeric(df['pct_change_consumer_price_index'], downcast='float', errors='coerce')
df['pct_change_construction_wage_rate_index'] = pd.to_numeric(df['pct_change_construction_wage_rate_index'], downcast='float', errors='coerce')
df['pct_change_owned_accommodation_costs'] = pd.to_numeric(df['pct_change_owned_accommodation_costs'], downcast='float', errors='coerce')
df['pct_change_rental_accommodation_costs'] = pd.to_numeric(df['pct_change_rental_accommodation_costs'], downcast='float', errors='coerce')
df['bachelor_average_rent_dollars'] = pd.to_numeric(df['bachelor_average_rent_dollars'], downcast='float', errors='coerce')
df['one_bedroom_average_rent'] = pd.to_numeric(df['one_bedroom_average_rent'], downcast='float', errors='coerce')
df['two_bed_average_rent_dollars'] = pd.to_numeric(df['two_bed_average_rent_dollars'], downcast='float', errors='coerce')
df['three_plus_bed_average_rent_dollars'] = pd.to_numeric(df['three_plus_bed_average_rent_dollars'], downcast='float', errors='coerce')
df['july_1_population_thousands'] = pd.to_numeric(df['july_1_population_thousands'], downcast='float', errors='coerce')
df['labour_force_participation_rate_percent'] = pd.to_numeric(df['labour_force_participation_rate_percent'], downcast='float', errors='coerce')
df['pct_change_employment'] = pd.to_numeric(df['pct_change_employment'], downcast='float', errors='coerce')
df['pct_change_unemployment'] = pd.to_numeric(df['pct_change_unemployment'], downcast='float', errors='coerce')
df['pct_change_real_disposable_income'] = pd.to_numeric(df['pct_change_real_disposable_income'], downcast='float', errors='coerce')
df['net_migration'] = pd.to_numeric(df['net_migration'], downcast='float', errors='coerce')

In [14]:
# Check info
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 27 entries, 1990 to 2016
Data columns (total 35 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   total_starts                              27 non-null     float32
 1   single_detached_starts                    27 non-null     float32
 2   multiple_starts                           27 non-null     float32
 3   semi_detached_starts                      27 non-null     float32
 4   row_starts                                27 non-null     float32
 5   apartment_starts                          27 non-null     float32
 6   starts_by_market                          27 non-null     float32
 7   freehold_homeownership_starts             27 non-null     float32
 8   rental_starts                             27 non-null     float32
 9   condominium_ownership_starts              27 non-null     float32
 10  co_op_and_unknown_starts                

# 04 Consistency Checks

In [15]:
# Check for mixed data types in Housing table
for col in df.columns.tolist():
    weird = (df[[col]].applymap(type) != df[[col]].iloc[0].apply(type)).any(axis=1)
    if len (df[weird]) > 0:
        print(col)

#### There are no mixed-type data columns in the dataframe.

In [16]:
# Check for missing observations in dataframe
df.isnull().sum()

indicator
total_starts                                 0
single_detached_starts                       0
multiple_starts                              0
semi_detached_starts                         0
row_starts                                   0
apartment_starts                             0
starts_by_market                             0
freehold_homeownership_starts                0
rental_starts                                0
condominium_ownership_starts                 0
co_op_and_unknown_starts                     0
total_completions                            0
residential_building_permits                 0
res_bldg_permit_value_thousands              0
new_completed_and_unabsorbed_home_supply     2
single_and_semi_detached_supply              2
row_and_apartment_supply                     2
rental_vacancy_percent                       0
rental_availabilty_rate_percent             15
standard_space_seniors_vacancy_rate         19
pct_change_new_housing_price_index           0
pct

#### Some columns are missing data, however having examined the table in Excel, this is because that variable was not measured every year. No action will be taken.

In [17]:
# Look for full duplicates in the dataframe
df_dups2 = df[df.duplicated()]
df_dups2

indicator,total_starts,single_detached_starts,multiple_starts,semi_detached_starts,row_starts,apartment_starts,starts_by_market,freehold_homeownership_starts,rental_starts,condominium_ownership_starts,...,bachelor_average_rent_dollars,one_bedroom_average_rent,two_bed_average_rent_dollars,three_plus_bed_average_rent_dollars,july_1_population_thousands,labour_force_participation_rate_percent,pct_change_employment,pct_change_unemployment,pct_change_real_disposable_income,net_migration


#### There are no duplicates in the dataframe.

In [18]:
# Compute descriptive statistics
df.describe()

indicator,total_starts,single_detached_starts,multiple_starts,semi_detached_starts,row_starts,apartment_starts,starts_by_market,freehold_homeownership_starts,rental_starts,condominium_ownership_starts,...,bachelor_average_rent_dollars,one_bedroom_average_rent,two_bed_average_rent_dollars,three_plus_bed_average_rent_dollars,july_1_population_thousands,labour_force_participation_rate_percent,pct_change_employment,pct_change_unemployment,pct_change_real_disposable_income,net_migration
count,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,...,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,25.0,25.0
mean,4335.296387,2674.592529,1660.703735,331.59259,144.703705,1184.407349,3207.222168,2047.851807,926.148132,180.074081,...,527.518494,600.962952,723.740723,925.037048,934.377808,62.103703,0.6,10.022223,1.584,711.039978
std,608.86731,642.001038,541.452515,79.938087,82.190117,510.973236,437.948364,361.254089,421.04657,172.502808,...,106.787483,117.944412,149.530441,199.616837,9.247217,1.645152,1.396149,1.860176,1.817434,1978.330811
min,3056.0,1350.0,781.0,218.0,17.0,275.0,2171.0,1293.0,279.0,0.0,...,391.0,460.0,539.0,658.0,910.5,59.400002,-3.1,7.6,-2.5,-2754.0
25%,3950.5,2257.5,1288.0,270.0,76.5,778.5,2820.0,1816.0,642.5,6.0,...,423.0,489.0,588.5,746.0,931.549988,60.65,-0.3,8.9,0.3,-432.0
50%,4309.0,2856.0,1607.0,318.0,144.0,1210.0,3304.0,2133.0,810.0,166.0,...,514.0,575.0,684.0,912.0,935.200012,62.400002,0.9,9.2,2.1,652.0
75%,4749.0,3179.0,2119.5,373.0,218.5,1522.0,3493.0,2284.5,1113.0,323.0,...,614.5,702.5,844.5,1086.0,940.849976,63.65,1.6,11.4,2.8,1475.0
max,5560.0,3639.0,2599.0,518.0,277.0,2222.0,4075.0,2527.0,1793.0,530.0,...,724.0,819.0,1008.0,1262.0,949.5,64.400002,3.6,14.3,4.4,6494.0


In [19]:
df.describe().to_clipboard()

In [20]:
# Check final shape
df.shape

(27, 35)

# 05 Export Dataframe

In [21]:
# Export data to pkl
df.to_pickle(os.path.join(path, '02 Data', 'Prepared Data', 'NSHousing_cleaned.pkl'))