# Housing affordability indicator

Here we combine data from `10` and `11` to create a house affordability indicator based on housing prices and median salaries in a NUTS2 area.





## Preamble

In [None]:
%run ../notebook_preamble.ipy

In [None]:
def make_dirs(name,dirs = ['raw','processed']):
    '''
    Utility that creates directories to save the data
    
    '''
    
    for d in dirs:
        if name not in os.listdir(f'../../data/{d}'):
            os.mkdir(f'../../data/{d}/{name}')
            
def flat_freq(a_list):
    '''
    Return value counts for categories in a nested list
    
    '''
    return(pd.Series([x for el in a_list for x in el]).value_counts())

        

def flatten_list(a_list):
    
    return([x for el in a_list for x in el])

        

In [None]:
def save_data(df,name,path,today=today_str):
    '''
    Utility to save processed data quicker
    
    Arguments:
        df (df) is the dataframe we want to save
        name (str) is the name of the file
        path (str) is the path where we want to save the file
        today (str) is the day when the data is saved
    
    '''
    
    df.to_csv(f'{path}/{today_str}_{name}.csv')
    

In [None]:

def make_indicator(table,target_path,var_lookup,year_var,nuts_var='nuts_code',nuts_spec=2018,decimals=3):
    '''
    We use this function to create and save indicators using our standardised format.
    
    Args:
        table (df) is a df with relevant information
        target_path (str) is the location of the directory where we want to save the data (includes interim and processed)
        var_lookup (dict) is a lookup to rename the variable into our standardised name
        year (str) is the name of the year variable
        nuts_var (str) is the name of the NUTS code variable. We assume it is nuts_code
        nuts_spec (y) is the value of the NUTS specification. We assume we are working with 2018 NUTS
    
    '''
    #Copy
    t = table.reset_index(drop=False)
    
    #Reset index (we assume that the index is the nuts code, var name and year - this might need to be changed)
    
    
    #Process the interim data into an indicator
    
    #This is the variable name and code
    var_name = list(var_lookup.keys())[0]
    
    var_code = list(var_lookup.values())[0]
    
    #Focus on those
    t = t[[year_var,nuts_var,var_name]]
    
    #Add the nuts specification
    t['nuts_year_spec'] = nuts_spec
    
    #Rename variables
    t.rename(columns={var_name:var_code,year_var:'year',nuts_var:'nuts_id'},inplace=True)

    #Round variables
    t[var_code] = [np.round(x,decimals) if decimals>0 else int(x) for x in t[var_code]]
    
    
    #Reorder variables
    t = t[['year','nuts_id','nuts_year_spec',var_code]]
    
    print(t.head())
    
    #Save in the processed folder
    t.to_csv(f'../../data/processed/{target_path}/{var_code}.csv',index=False)

In [None]:
#dirs

if 'house_afford' not in os.listdir('../../data/raw'):
    os.makedirs('../../data/raw/house_afford')

if 'house_afford' not in os.listdir('../../data/processed/'):
    os.makedirs('../../data/processed/house_afford')

## Load data

In [None]:
housing = pd.read_csv('../../data/interim/housing/2020_02_11_nuts_house_prices.csv')

median_salaries = pd.read_csv('../../data/interim/ashe_place/2020_02_11_ashe_nuts_2_sci_tech.csv')

In [None]:
housing.head()

### Merge data

In [None]:
housing_salaries = pd.merge(housing,median_salaries,left_on=['NUTS218CD','year'],right_on=['nuts_2_codes','year'],how='left')

In [None]:
housing_salaries['housing_affordability_sci_tech'] = housing_salaries['mean_price_housing']/housing_salaries['gross_annual_salary_median']

In [None]:
housing_salaries_final = housing_salaries.dropna(axis=0,subset=['housing_affordability_sci_tech'])

In [None]:
housing_salaries_final.loc[housing_salaries_final['year']==2017].sort_values('housing_affordability_sci_tech',ascending=False).head()

In [None]:
make_indicator(housing_salaries_final,'house_afford',{'housing_affordability_sci_tech':'ratio_house_price_salary'},
               year_var='year',nuts_var='NUTS218CD')