## Exploring housing markers in Canada and British Columbia


Average value of new mortgage loan 

Canada, provinces and CMAs 2012 Q3 to 2020 Q3

In [5]:
%run ./scr/main.py
import plotly.express as px

In [6]:
data = "./data/"

In [7]:
# a function for data cleaning
def data_prep(data_file):
    df = pd.read_excel(data_file)
    temp = df.copy()
    
    # Resetting column
    new_header = temp.iloc[3]
    temp = temp[4:]
    temp.columns = new_header
    
    #Removing empty rows
    temp.dropna(thresh=2, inplace=True)
    #Ensuring the indices go from 0 without skipping any number
    temp.reset_index(inplace=True)
    temp.drop(['index'],inplace=True,axis=1)
    #Selecting all rows until and including the row for "Victoria"
    row_select = temp[temp['Geography']=='Victoria'].index
    temp = temp.iloc[:row_select[0]+1,1:]
    
    #Categorize Geography as Country, Province, and City
    temp['Region/City'] = ""
    temp['Region/City'][temp.index ==0] = 'Country'
    temp['Region/City'][(temp.index < 11) & (temp.index >0)] = 'Province'
    temp['Region/City'][temp.index >= 11] = 'City'

    #Rearrange columns -- bring column 'Region/City' to the 1st column position
    # Access columns
    cols = temp.columns.tolist()
    # Bring last col to front
    cols = cols[-1:] + cols[:-1]
    # Set new column order
    temp = temp[cols] 
    
    #Converting data type to the right format
    #Isolating yearly and quarterly columns and convert to numeric.
    int_cols = temp.columns.drop(['Region/City', 'Geography'])
    temp[int_cols] = temp[int_cols].apply(pd.to_numeric, errors='coerce')

    return temp

In [8]:
# a function for box or violin graphs
def graph_region(df, level: str, graph_type: str):
    """
    df: data frame object with mortage data
    level: "Province" or "City"
    graph_type: "box", "violin"
    """
    
    plot_dict = {'box': px.box,'violin': px.violin}
    
    def slice_data(df, level):
        """
        Extract a subset of df based on level
        Return a dataframe
        """
        temp = df[df['Region/City']==level]
        temp = pd.melt(temp, id_vars='Geography', value_vars=temp.columns[2:])
        temp.rename(columns = {3:'Time'}, inplace = True)
        return temp
    
    try:
        region_df = slice_data(df, level)
        fig = plot_dict[graph_type](region_df, x="Geography", y="value", color = "Geography", points='all')
        fig.update_xaxes(tickangle=-45)
        fig.show()
    
    except KeyError: print("Key not found. Make sure that 'level' is in ['Province','City']", 
                           "and 'graph_type' is in ['box','violin']")


In [13]:
data_file = data + "average-value-new-mortgage-loans-ca-prov-cmas-2012-q3-2020-q3-en.xlsx"
df = data_prep(data_file)

In [None]:
graph_region(df, level='Province', graph_type='box')

In [None]:
# bar plot
df_province = df[df['Region/City']=="Province"]
df_province.iloc[:,2:].plot(kind='bar')

In [19]:
#line plot

# need to access the function slice_data within function data_prep

temp = data_prep.slice_data(df,level='Province')
fig = px.line(df, x='Time', y='value', color = 'Geography')
fig.update_xaxes(tickangle=-45)
fig.show()

AttributeError: 'function' object has no attribute 'slice_data'

# Delinquency

In [11]:
data_delinquency = data + "mortgage-delinquency-rate-ca-prov-cmas-2012-q3-2020-q4-en.xlsx"
df_delin = data_prep(data_delinquency)

In [30]:
graph_region(df_delin, level='Province', graph_type='box')

In [15]:
df.columns

Index(['Region/City', 'Geography', '2012Q3', '2012Q4', '2013Q1', '2013Q2',
       '2013Q3', '2013Q4', '2014Q1', '2014Q2', '2014Q3', '2014Q4', '2015Q1',
       '2015Q2', '2015Q3', '2015Q4', '2016Q1', '2016Q2', '2016Q3', '2016Q4',
       '2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2', '2018Q3',
       '2018Q4', '2019Q1', '2019Q2', '2019Q3', '2019Q4', '2020Q1', '2020Q2',
       '2020Q3'],
      dtype='object', name=3)

In [16]:
df_delin.columns

Index(['Region/City', 'Geography', '2012Q3', '2012Q4', '2013Q1', '2013Q2',
       '2013Q3', '2013Q4', '2014Q1', '2014Q2', '2014Q3', '2014Q4', '2015Q1',
       '2015Q2', '2015Q3', '2015Q4', '2016Q1', '2016Q2', '2016Q3', '2016Q4',
       '2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2', '2018Q3',
       '2018Q4', '2019Q1', '2019Q2', '2019Q3', '2019Q4', '2020Q1', '2020Q2',
       '2020Q3', '2020Q4'],
      dtype='object', name=3)

In [17]:
test = pd.concat(df, df_delin)

TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"

# Average Income

In [10]:
data_income = data + "real-average-household-income-after-taxes-tenure-2006-2018-en.xlsx"
df_income = data_prep(data_income)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(~key, value, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(~key, value, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(~key, value, inplace=True)


ValueError: Columns must be same length as key

## Exercise

1. Identify different ways of visualizing data. 
2. What insights do you gain for the study period? 
3. How are these insights different for each province/city?