# Intro
The goal of this notebook is to produce an automated valuation tool for Singapore. We make use of the data provided by the Urban Redevelopment Authority (URA).

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

if __name__ == '__main__':
    dataset = pd.read_csv('datasets/singapore_ura.csv', index_col = 0)
    y = dataset['log_price_psf']
    
    results_series_list = []
    period_list = ['%sQ%s' % (year, qtr) for year in range(2017,2021) for qtr in range(1,5)]
    period_list = period_list[:-2]

    for start_index in range(0,len(period_list)+1-5):
        window_period_list = period_list[start_index:start_index+5]

        period_filter = dataset['Period_%s' % window_period_list[0]] == 1
        for i in [1,2,3,4]:
            period_filter = period_filter | (dataset['Period_%s' % window_period_list[i]] == 1)

        y_target = y[period_filter]
        X_target = dataset[period_filter]
        X_columns = [c for c in dataset.columns if not c.startswith('Period_') and not c in ['log_price_psf']] + ['Period_%s' % period for period in window_period_list[1:5]]
        X_target = X_target[X_columns]
        X_target = sm.add_constant(X_target)

        model = sm.OLS(y_target,X_target)
        results = model.fit()

        result_series = results.params
        pvalue_series = results.pvalues
        pvalue_series.index = ['pvalue_%s' % idx for idx in pvalue_series.index]
        result_series = result_series.append(pvalue_series)
        result_series = result_series.append(pd.Series([results.rsquared], index = ['rsquared']))
        result_series = result_series.append(pd.Series([results.rsquared_adj], index = ['rsquared_adj']))
        result_series = result_series.append(pd.Series([results.nobs], index = ['nobs']))

        results_series_list.append(result_series)
        results.save("rolling_%s.pkl" % window_period_list[-1])

    results_df = pd.concat(results_series_list,axis = 1)
    results_df.columns = period_list[0:len(period_list)+1-5]
    results_df.to_csv('rolling.csv')
    print(">>>saved to rolling.csv")    

    index_dict = {}
    for start_index in range(0,len(period_list)+1-5):
        window_period_list = period_list[start_index:start_index+5]
        index_list = [100.0]
        for i in [1,2,3,4]:
            index_list.append(100.0 * np.exp(results_df[window_period_list[0]]['Period_%s' % window_period_list[i]]))    
        index_dict[window_period_list[0]] = index_list

    final_index_list = []
    for period in sorted(index_dict.keys()):
        if not len(final_index_list):
            final_index_list = index_dict[period]
        else:
            new_index_value = index_dict[period][-1] * final_index_list[-1] / index_dict[period][-2]
            final_index_list.append(new_index_value)

    final_index_df = pd.DataFrame(final_index_list, index = period_list, columns = ['Index'])
    final_index_df.to_csv('rolling_index.csv')
    print(">>>index saved to rolling_index.csv")
