In [476]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import math
import matplotlib.pyplot as plt
import matplotlib as mpl
import PublicFunctions as pf

np.set_printoptions(linewidth=1000)

# Read

The data was downloaded from Zillow Research (https://www.zillow.com/research/data/ --> Inventory --> New Listings (Raw, All Homes, Montly View)) as the .csv file given here. Column headers represent the last day of the month, which we simply refer to as the month, i.e., column '2018-1-31' is 'January 2018.'

In [477]:
df = pd.read_csv('Metro_new_listings_uc_sfrcondo_month.csv')
df

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,2018-01-31,2018-02-28,2018-03-31,2018-04-30,2018-05-31,...,2022-11-30,2022-12-31,2023-01-31,2023-02-28,2023-03-31,2023-04-30,2023-05-31,2023-06-30,2023-07-31,2023-08-31
0,102001,0,United States,country,,323256.0,376081.0,476114.0,509438.0,538099.0,...,240504.0,169573.0,235076.0,247763.0,333606.0,332116.0,367751.0,376743.0,335943.0,349327.0
1,394913,1,"New York, NY",msa,NY,14880.0,22651.0,22492.0,25714.0,25627.0,...,9711.0,5829.0,9754.0,10439.0,14598.0,13820.0,14360.0,13639.0,11370.0,11234.0
2,753899,2,"Los Angeles, CA",msa,CA,8136.0,9261.0,10164.0,10597.0,11556.0,...,4806.0,3048.0,4699.0,4988.0,6370.0,6156.0,6831.0,6949.0,6658.0,6434.0
3,394463,3,"Chicago, IL",msa,IL,8645.0,11908.0,17472.0,16696.0,16869.0,...,6271.0,3899.0,6043.0,7047.0,9530.0,9649.0,10865.0,11071.0,9597.0,9942.0
4,394514,4,"Dallas, TX",msa,TX,7389.0,7894.0,10535.0,11797.0,11995.0,...,5459.0,3983.0,5188.0,5291.0,8159.0,8879.0,9467.0,10007.0,8650.0,8211.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
920,394968,934,"Pecos, TX",msa,TX,,,,,,...,,,,,,,10.0,6.0,9.0,5.0
921,394743,936,"Ketchikan, AK",msa,AK,9.0,11.0,17.0,11.0,17.0,...,7.0,8.0,11.0,15.0,25.0,15.0,11.0,11.0,9.0,9.0
922,753874,937,"Craig, CO",msa,CO,,,,,,...,,,7.0,14.0,16.0,30.0,27.0,36.0,24.0,34.0
923,395188,938,"Vernon, TX",msa,TX,,,,,,...,,,14.0,6.0,14.0,10.0,20.0,19.0,12.0,11.0


# Isolate the data for the team

In [478]:
team = 29
row = df.iloc[team]
row

RegionID          394355
SizeRank              29
RegionName    Austin, TX
RegionType           msa
StateName             TX
                 ...    
2023-04-30        3367.0
2023-05-31        3533.0
2023-06-30        3630.0
2023-07-31        2905.0
2023-08-31        2809.0
Name: 29, Length: 73, dtype: object

Calculate the monthly averages for years 1 thru 4. 

In [479]:
yearly_data = {}

def Get_Yearly_Data(year, data):
    month_labels = [label for label in data.index if year in label]
    return {"data": data[month_labels]}

In [480]:
years = ["2018", "2019", "2020", "2021", "2022"]

for current_year in years:
    yearly_data[current_year] = Get_Yearly_Data(current_year, row)
    yearly_data[current_year]["avg_demand"] = yearly_data[current_year]["data"].mean()

yearly_data

{'2018': {'data': 2018-01-31    2050.0
  2018-02-28    2826.0
  2018-03-31    3573.0
  2018-04-30    3981.0
  2018-05-31    4078.0
  2018-06-30    3703.0
  2018-07-31    3340.0
  2018-08-31    3124.0
  2018-09-30    2311.0
  2018-10-31    2556.0
  2018-11-30    2034.0
  2018-12-31    1389.0
  Name: 29, dtype: object,
  'avg_demand': 2913.75},
 '2019': {'data': 2019-01-31    2367.0
  2019-02-28    2600.0
  2019-03-31    3512.0
  2019-04-30    3767.0
  2019-05-31    4153.0
  2019-06-30    3641.0
  2019-07-31    3385.0
  2019-08-31    3045.0
  2019-09-30    2595.0
  2019-10-31    2756.0
  2019-11-30    2071.0
  2019-12-31    1483.0
  Name: 29, dtype: object,
  'avg_demand': 2947.9166666666665},
 '2020': {'data': 2020-01-31    2540.0
  2020-02-29    2671.0
  2020-03-31    3525.0
  2020-04-30    2993.0
  2020-05-31    3508.0
  2020-06-30    3537.0
  2020-07-31    3882.0
  2020-08-31    3249.0
  2020-09-30    2889.0
  2020-10-31    2813.0
  2020-11-30    2063.0
  2020-12-31    1643.0
  Name:

# Forecast Using the Decomposition Method

Calculate the seasonal index for each season each year, and then the average (over the two years) for each season.

In [481]:
values = [yearly_data[year]["data"].values/yearly_data[year]["avg_demand"] for year in years]
SI = pd.DataFrame(values, columns = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'], index = ['Year 1', 'Year 2', 'Year 3', 'Year 4', 'Year 5'])
SI.loc['Average'] = SI.mean()
SI

Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December
Year 1,0.703561,0.969884,1.226255,1.366281,1.399571,1.270871,1.146289,1.072158,0.793136,0.87722,0.698069,0.476705
Year 2,0.80294,0.881979,1.19135,1.277852,1.408792,1.23511,1.148269,1.032933,0.880283,0.934898,0.70253,0.503067
Year 3,0.863138,0.907654,1.197859,1.017076,1.192082,1.201937,1.319174,1.104069,0.981735,0.955909,0.701045,0.558321
Year 4,0.675293,0.638464,1.067446,1.241572,1.167915,1.309927,1.401262,1.167031,1.096025,0.978467,0.744236,0.512362
Year 5,0.636715,0.749112,1.111317,1.269106,1.44079,1.666512,1.371931,1.033812,0.993979,0.767331,0.560753,0.398641
Average,0.736329,0.829419,1.158845,1.234377,1.32183,1.336871,1.277385,1.082001,0.949031,0.902765,0.681327,0.489819


Deseasonalize the data.

In [482]:
y_dm = np.array(row[5:65])
y_dm[0] = y_dm[0]/SI.loc['Average']['January']
y_dm[1] = y_dm[1]/SI.loc['Average']['February']
y_dm[2] = y_dm[2]/SI.loc['Average']['March']
y_dm[3] = y_dm[3]/SI.loc['Average']['April']
y_dm[4] = y_dm[4]/SI.loc['Average']['May']
y_dm[5] = y_dm[5]/SI.loc['Average']['June']
y_dm[6] = y_dm[6]/SI.loc['Average']['July']
y_dm[7] = y_dm[7]/SI.loc['Average']['August']
y_dm[8] = y_dm[8]/SI.loc['Average']['September']
y_dm[9] = y_dm[9]/SI.loc['Average']['October']
y_dm[10] = y_dm[10]/SI.loc['Average']['November']
y_dm[11] = y_dm[11]/SI.loc['Average']['December']
y_dm[12] = y_dm[12]/SI.loc['Average']['January']
y_dm[13] = y_dm[13]/SI.loc['Average']['February']
y_dm[14] = y_dm[14]/SI.loc['Average']['March']
y_dm[15] = y_dm[15]/SI.loc['Average']['April']
y_dm[16] = y_dm[16]/SI.loc['Average']['May']
y_dm[17] = y_dm[17]/SI.loc['Average']['June']
y_dm[18] = y_dm[18]/SI.loc['Average']['July']
y_dm[19] = y_dm[19]/SI.loc['Average']['August']
y_dm[20] = y_dm[20]/SI.loc['Average']['September']
y_dm[21] = y_dm[21]/SI.loc['Average']['October']
y_dm[22] = y_dm[22]/SI.loc['Average']['November']
y_dm[23] = y_dm[23]/SI.loc['Average']['December']
y_dm[24] = y_dm[24]/SI.loc['Average']['January']
y_dm[25] = y_dm[25]/SI.loc['Average']['February']
y_dm[26] = y_dm[26]/SI.loc['Average']['March']
y_dm[27] = y_dm[27]/SI.loc['Average']['April']
y_dm[28] = y_dm[28]/SI.loc['Average']['May']
y_dm[29] = y_dm[29]/SI.loc['Average']['June']
y_dm[30] = y_dm[30]/SI.loc['Average']['July']
y_dm[31] = y_dm[31]/SI.loc['Average']['August']
y_dm[32] = y_dm[32]/SI.loc['Average']['September']
y_dm[33] = y_dm[33]/SI.loc['Average']['October']
y_dm[34] = y_dm[34]/SI.loc['Average']['November']
y_dm[35] = y_dm[35]/SI.loc['Average']['December']
y_dm[36] = y_dm[36]/SI.loc['Average']['January']
y_dm[37] = y_dm[37]/SI.loc['Average']['February']
y_dm[38] = y_dm[38]/SI.loc['Average']['March']
y_dm[39] = y_dm[39]/SI.loc['Average']['April']
y_dm[40] = y_dm[40]/SI.loc['Average']['May']
y_dm[41] = y_dm[41]/SI.loc['Average']['June']
y_dm[42] = y_dm[42]/SI.loc['Average']['July']
y_dm[43] = y_dm[43]/SI.loc['Average']['August']
y_dm[44] = y_dm[44]/SI.loc['Average']['September']
y_dm[45] = y_dm[45]/SI.loc['Average']['October']
y_dm[46] = y_dm[46]/SI.loc['Average']['November']
y_dm[47] = y_dm[47]/SI.loc['Average']['December']
y_dm[48] = y_dm[48]/SI.loc['Average']['January']
y_dm[49] = y_dm[49]/SI.loc['Average']['February']
y_dm[50] = y_dm[50]/SI.loc['Average']['March']
y_dm[51] = y_dm[51]/SI.loc['Average']['April']
y_dm[52] = y_dm[52]/SI.loc['Average']['May']
y_dm[53] = y_dm[53]/SI.loc['Average']['June']
y_dm[54] = y_dm[54]/SI.loc['Average']['July']
y_dm[55] = y_dm[55]/SI.loc['Average']['August']
y_dm[56] = y_dm[56]/SI.loc['Average']['September']
y_dm[57] = y_dm[57]/SI.loc['Average']['October']
y_dm[58] = y_dm[58]/SI.loc['Average']['November']
y_dm[59] = y_dm[59]/SI.loc['Average']['December']
print("y = ",y_dm)

y =  [2784.0806865684945 3407.205461810885 3083.2415346368616 3225.107950032159 3085.116846496077 2769.900248059269 2614.716625388807 2887.244309478417 2435.114324232996 2831.3021278917217 2985.351344917954 2835.7387513902545 3214.594626881769 3134.7254779576438 3030.6029302112115 3051.741182559945 3141.856366723445 2723.52330628782 2649.9448433955426 2814.231409206716 2734.3667985221223 3052.843765441935 3039.6571461775234 3027.6461974886593 3449.543875065354 3220.327596778795 3041.820993449465 2424.7043693660517 2653.8964927680818 2645.729726542164 3039.0209400477092 3002.771050414653 3044.1563317650907 3115.9831321437455 3027.91535131059 3354.29716957105 3112.7380163975554 2612.6731195131592 3126.387931707067 3413.8670940556435 2998.872775750478 3325.6755341267917 3723.2312186674153 3660.811366787455 3919.7859308294005 3678.6988915212864 3707.471729234391 3550.2877528204845 2800.377744245968 2924.940003663555 3105.6776611133687 3329.6140855644744 3529.95468507864 4037.037979685626
 

Perform simple linear regression to extract trend.

In [483]:
x = np.arange(1, 61).reshape((-1, 1))
model_dm = LinearRegression().fit(x,y_dm)

r_sq_dm = model_dm.score(x, y_dm)
print('Coefficient of determination:', r_sq_dm)
intercept_dm = model_dm.intercept_
print('Intercept:', intercept_dm)
slope_dm = model_dm.coef_[0]
print('Slope:', slope_dm)

Coefficient of determination: 0.12662299339034178
Intercept: 2863.829208236648
Slope: 7.282200288287672


Extend the trend to year 6. 

In [484]:
forecast_dm = model_dm.predict(np.arange(61, 73).reshape((-1, 1)))
forecast_dm

array([3308.04342582, 3315.32562611, 3322.6078264 , 3329.89002669, 3337.17222698, 3344.45442726, 3351.73662755, 3359.01882784, 3366.30102813, 3373.58322842, 3380.86542871, 3388.14762899])

Seasonalize year 6 forecasts.

In [485]:
forecast_dm[0] = forecast_dm[0]*SI.loc['Average']['January']
forecast_dm[1] = forecast_dm[1]*SI.loc['Average']['February']
forecast_dm[2] = forecast_dm[2]*SI.loc['Average']['March']
forecast_dm[3] = forecast_dm[3]*SI.loc['Average']['April']
forecast_dm[4] = forecast_dm[4]*SI.loc['Average']['May']
forecast_dm[5] = forecast_dm[5]*SI.loc['Average']['June']
forecast_dm[6] = forecast_dm[6]*SI.loc['Average']['July']
forecast_dm[7] = forecast_dm[7]*SI.loc['Average']['August']
forecast_dm[8] = forecast_dm[8]*SI.loc['Average']['September']
forecast_dm[9] = forecast_dm[9]*SI.loc['Average']['October']
forecast_dm[10] = forecast_dm[10]*SI.loc['Average']['November']
forecast_dm[11] = forecast_dm[11]*SI.loc['Average']['December']
forecast_dm

array([2435.80908256, 2749.79314409, 3850.3885052 , 4110.34061545, 4411.17436348, 4471.10496229, 4281.45835282, 3634.46029964, 3194.72543798, 3045.55230856, 2303.47436113, 1659.58061347])

# Forecast Using Multiple Linear Regression

Populate independent variables

In [486]:
def Add_Dummy_Vars(array, num_dummy_vars, num_rows):
    array = np.pad(array, ((0,0),(0,num_dummy_vars)), mode='constant', constant_values=0)
    for counter in range(num_rows):
        if counter % (num_dummy_vars + 1) != 0:
            array[counter][counter % (num_dummy_vars + 1)] = 1
    return array

In [487]:
x = np.arange(1, 61).reshape((-1, 1))
x = Add_Dummy_Vars(x, 11, x.size)
x

array([[ 1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 2,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 3,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 4,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 5,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0],
       [ 6,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0],
       [ 7,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 8,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0],
       [ 9,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0],
       [10,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0],
       [11,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0],
       [12,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1],
       [13,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [14,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [15,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [16,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0],
       [17,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0],
       [18,  0,  0,  0,  0,  1,

Run multiple linear regression.

In [488]:
model_mlr = LinearRegression().fit(x,row[5:65])

var_names = ["Time", "Is_February", "Is_March", "Is_April", "Is_May", "Is_June", "Is_July", "Is_August", "Is_September", "Is_October", "Is_November", "Is_December"]
r_sq_mlr = model_mlr.score(x, row[5:65])
print('Coefficient of determination:', r_sq_mlr)
intercept_mlr = model_mlr.intercept_
print('Intercept:', intercept_mlr)
coefs_mlr = {"Variable": var_names, "Coefficient": list(model_mlr.coef_)}
pd.DataFrame(coefs_mlr)

Coefficient of determination: 0.8542750304083581
Intercept: 2033.9361111111107


Unnamed: 0,Variable,Coefficient
0,Time,9.130556
1,Is_February,266.669444
2,Is_March,1285.938889
3,Is_April,1523.408333
4,Is_May,1775.077778
5,Is_June,1836.947222
6,Is_July,1644.216667
7,Is_August,1019.286111
8,Is_September,611.555556
9,Is_October,441.825


Calculate the forecasts.

In [489]:
x_new = np.arange(61, 73).reshape((-1, 1))
x_new = Add_Dummy_Vars(x_new, 11, x_new.size)

forecast_mlr = model_mlr.predict(x_new)
forecast_mlr

array([2590.9, 2866.7, 3895.1, 4141.7, 4402.5, 4473.5, 4289.9, 3674.1, 3275.5, 3114.9, 2430.7, 1837.7])

# Calculate Forecast Accuracy

In [490]:
actual_Year6 = [row[t] for t in range(65, 73)]
actual_Year6

[2102.0, 2135.0, 3511.0, 3367.0, 3533.0, 3630.0, 2905.0, 2809.0]

In [491]:
deviation_dm = [actual_Year6[t] - forecast_dm[t] for t in range(8)]
deviation_mlr = [actual_Year6[t] - forecast_mlr[t] for t in range(8)]

MAD_dm = pf.CalculateMeanAbsoluteDeviation(deviation_dm)
MAD_mlr = pf.CalculateMeanAbsoluteDeviation(deviation_mlr)
print("MAD_DM:",MAD_dm)
print("MAD_MLR:",MAD_mlr)

MAD_DM: 744.0661656930478
MAD_MLR: 792.7999999999997


# Adding Independent Variables

Import monthy average 30 year mortgage rates from https://www.freddiemac.com/pmms/pmms_archives (the weekly data was simple averaged together for monthly mortgage rates)

In [492]:
mortgage_data = pd.read_csv("30_yr_mortgage_rates.csv", index_col="date")
mortgage_data = mortgage_data.T
mortgage_data

date,Apr-71,May-71,Jun-71,Jul-71,Aug-71,Sep-71,Oct-71,Nov-71,Dec-71,Jan-72,...,Nov-22,Dec-22,Jan-23,Feb-23,Mar-23,Apr-23,May-23,Jun-23,Jul-23,Aug-23
avg_pmms30,7.31,7.425,7.53,7.604,7.6975,7.6875,7.628,7.55,7.48,7.4375,...,6.805,6.364,6.2725,6.2575,6.544,6.3425,6.425,6.714,6.84,7.072


In [493]:
mortgage_x = np.arange(1.0, 61.0).reshape((-1, 1))
mortgage_x = Add_Dummy_Vars(mortgage_x, 11, 60)
mortgage_x = np.pad(mortgage_x, ((0,0),(0,1)), mode='constant', constant_values=0)

monthly_mortgage_rate = (mortgage_data[[date for date in list(mortgage_data) if any(curr_date in date for curr_date in ["18", "19", "20", "21", "22"])]]).values[0]

for index in range(60):
    mortgage_x[index][12] = monthly_mortgage_rate[index]

mortgage_x

array([[ 1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  4.0325],
       [ 2.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  4.33  ],
       [ 3.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  4.444 ],
       [ 4.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  4.4675],
       [ 5.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  4.586 ],
       [ 6.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  4.57  ],
       [ 7.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  4.5275],
       [ 8.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  4

In [494]:
model_mlr_adjusted = LinearRegression().fit(mortgage_x, row[5:65])

var_names = ["Time", "Is_February", "Is_March", "Is_April", "Is_May", "Is_June", "Is_July", "Is_August", "Is_September", "Is_October", "Is_November", "Is_December", "Mortgage_Rate"]
r_sq_mlr_adj = model_mlr_adjusted.score(mortgage_x, row[5:65])
print('Coefficient of determination:', r_sq_mlr_adj)
intercept_mlr_adj = model_mlr_adjusted.intercept_
print('Intercept:', intercept_mlr_adj)
coefs_mlr_adj = {"Variable": var_names, "Coefficient": list(model_mlr_adjusted.coef_)}
pd.DataFrame(coefs_mlr_adj)

Coefficient of determination: 0.8551861842752142
Intercept: 2129.9726829807646


Unnamed: 0,Variable,Coefficient
0,Time,9.266125
1,Is_February,268.908077
2,Is_March,1291.71463
3,Is_April,1532.009466
4,Is_May,1784.219744
5,Is_June,1845.676539
6,Is_July,1650.403615
7,Is_August,1023.03935
8,Is_September,620.435042
9,Is_October,457.00697


Predict next 8 months

In [495]:
mortgage_x_new = np.arange(61.0, 69.0).reshape((-1, 1))
mortgage_x_new = Add_Dummy_Vars(mortgage_x_new, 11, 8)
mortgage_x_new = np.pad(mortgage_x_new, ((0,0),(0,1)), mode='constant', constant_values=0)

monthly_mortgage_rate = (mortgage_data[[date for date in list(mortgage_data) if any(curr_date in date for curr_date in ["23"])]]).values[0]

for index in range(8):
    mortgage_x_new[index][12] = monthly_mortgage_rate[index]

mortgage_x_new

array([[61.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  6.2725],
       [62.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  6.2575],
       [63.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  6.544 ],
       [64.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  6.3425],
       [65.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  6.425 ],
       [66.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  6.714 ],
       [67.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  6.84  ],
       [68.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  1.    ,  0.    ,  0.    ,  0.    ,  0.    ,  7

In [496]:
forecast_mlr_adjusted = model_mlr_adjusted.predict(mortgage_x_new)
forecast_mlr_adjusted

array([2524.81522697, 2803.39690019, 3827.68686935, 4082.72153278, 4341.95684159, 4404.82914015, 4215.39957888, 3590.99921031])

In [497]:
deviation_mlr_adjusted = [actual_Year6[t] - forecast_mlr_adjusted[t] for t in range(8)]

MAD_mlr_adjusted = pf.CalculateMeanAbsoluteDeviation(deviation_mlr_adjusted)
print("MAD_MLR_ADJUSTED:", MAD_mlr_adjusted)
print("MAD_MLR:", MAD_mlr)
print("MAD_DM:", MAD_dm)

MAD_MLR_ADJUSTED: 724.9756625270112
MAD_MLR: 792.7999999999997
MAD_DM: 744.0661656930478
