In [1]:
import pandas as pd
import numpy as np
import datetime
%matplotlib inline 

In [2]:
import sys
sys.path.append("..")
from src.models.linear_regression import FA_LinearRegression
from src.validators.walk_forward_validation import WalkForwardValidator

In [3]:
# Generate Xs
x1 = np.repeat(np.array(range(1,7)), 20)
x2 = np.arange(start=360, stop=1, step=-3)
x_redundant = np.random.normal(loc=-10, scale=5, size=len(x1))

In [4]:
len(x1), len(x2)

(120, 120)

In [5]:
# Generate y
coeff1 = 5
coeff2 = -2
intercept = 10
y = coeff1*x1 + coeff2*x2 + intercept + np.random.normal(loc=0, scale=1, size=len(x1))
X = pd.DataFrame({'X1': x1, 'X2': x2, 'X3': x_redundant})

In [6]:
# Fit model
model = FA_LinearRegression(confidence_level=80)
y_test_pred, y_test_down_pred, features_importances = model.fit_predict(X, y, X)
features_importances

Unnamed: 0,Feature,Importance
0,X1,5.043477
1,X2,-1.999494
2,X3,-0.006281
3,Intercept,9.684975


In [7]:
cv = WalkForwardValidator(3, date_col= X['X1'].apply(lambda x: datetime.date(2019, int(x), 1)))

In [8]:
model = FA_LinearRegression(confidence_level=80, cv=cv)
model.do_grid_search(X, y)
#model.gs
model.cross_val_score(X, y)



229.750873098204

In [16]:
sample_weight = X['X1']
sample_weight_series = pd.Series(sample_weight, index=X.index)
model = FA_LinearRegression(confidence_level=80, cv=cv, sample_weight_series=sample_weight_series)
model.cross_val_score(X, y)

172.48680646160048

In [17]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error, make_scorer

def weighted_mape(y_true, y_pred, sample_weight):
    if sample_weight is None:
        sample_weight = [1]*len(y_pred)
    else:
        sample_weight = sample_weight.loc[y_true.index.values].values

    print(sample_weight)
    return mean_absolute_error(y_true, y_pred, sample_weight=sample_weight)

score_params = {"sample_weight": sample_weight_series}
scorer = make_scorer(weighted_mape, greater_is_better=False, **score_params)
cross_val_score(LinearRegression(), X, pd.Series(y, index=X.index), cv=cv, scoring=scorer)

[4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6]
[5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
 6 6 6]
[6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6]


array([-1.03521559, -1.13391663, -0.80038987])