In [32]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
from scalecast.Forecaster import Forecaster
from scalecast.MVForecaster import MVForecaster
from scalecast.multiseries import export_model_summaries
from scalecast import GridGenerator
from sklearn.linear_model import LinearRegression

sns.set(rc={'figure.figsize':(12,8)})

In [6]:
data = pd.read_csv('avocado.csv',parse_dates=['Date']).sort_values(['Date'])
data = data.sort_values(['region','type','Date'])

data_cali = data.loc[data['region'] == 'California']
data_cali_org = data_cali.loc[data_cali['type'] == 'organic']
data_cali_con = data_cali.loc[data_cali['type'] == 'conventional']

I want to predict Volume using AvaragePrice,for exemple

In [8]:

data_cali_org_vol = data_cali_org[['Date','Total Volume']]
data_cali_org_px = data_cali_org[['Date','AveragePrice']]
data_cali_org_bag = data_cali_org[['Date','Small Bags']]

Using the MVForecaster:

In [7]:
models = ('mlr','elasticnet','knn','rf','gbt','xgboost','mlp')
GridGenerator.get_example_grids()
GridGenerator.get_mv_grids()

In [9]:
f_vol = Forecaster(data_cali_org['Total Volume'],current_dates = data_cali_org['Date'])
f_px = Forecaster(data_cali_org['AveragePrice'],current_dates = data_cali_org['Date'])
#f_sb = Forecaster(data_cali_org['Small Bags'],current_dates = data_cali_org['Date'])

In [16]:
mvf = MVForecaster(f_vol,f_px,names=['Volume','Price']) # init the mvf object
mvf.set_test_length(.2)
mvf.set_validation_length(4)
mvf.set_optimize_on('Volume')
mvf.tune_test_forecast(models)
mvf.set_best_model(determine_best_by='LevelTestSetMAPE')

In [21]:
pd.options.display.max_colwidth = 100
results = mvf.export_model_summaries()


results [
    [
        'ModelNickname',
        'Series',
        'HyperParams',
        'LevelTestSetMAPE',
        'LevelTestSetR2',
        'InSampleMAPE',
        'InSampleR2',
        'Lags'
    ]
]

Unnamed: 0,ModelNickname,Series,HyperParams,LevelTestSetMAPE,LevelTestSetR2,InSampleMAPE,InSampleR2,Lags
0,mlr,Volume,{},0.147779,-0.103135,0.113658,0.688821,1
1,elasticnet,Volume,"{'alpha': 2, 'l1_ratio': 0}",0.151841,0.010941,0.113606,0.68851,1
2,knn,Volume,{'n_neighbors': 88},0.215746,-0.159063,0.158867,0.587848,3
3,rf,Volume,"{'max_depth': 5, 'n_estimators': 100, 'max_features': 'auto', 'max_samples': 1}",0.162041,-0.031288,0.292354,-0.028625,3
4,gbt,Volume,"{'max_depth': 3, 'max_features': None}",0.191839,-1.004163,0.056094,0.937418,1
5,xgboost,Volume,"{'n_estimators': 150, 'scale_pos_weight': 5, 'learning_rate': 0.1, 'gamma': 3, 'subsample': 0.9}",0.216964,-1.427472,0.001241,0.999969,6
6,mlp,Volume,"{'activation': 'relu', 'hidden_layer_sizes': (25,), 'solver': 'lbfgs'}",0.166318,-0.390333,0.111516,0.690314,1
7,mlr,Price,{},0.074531,0.400218,0.059451,0.782215,1
8,elasticnet,Price,"{'alpha': 2, 'l1_ratio': 0}",0.143313,-1.371104,0.1259,0.181386,1
9,knn,Price,{'n_neighbors': 88},0.178178,-2.406891,0.094196,0.47563,3


In [46]:
results = results[results['ModelNickname']=='mlr'] 
results [
    [
        'ModelNickname',
        'Series',
        'HyperParams',
        'LevelTestSetMAPE',
        'LevelTestSetR2',
        'InSampleMAPE',
        'InSampleR2',
        'Lags'
    ]
]

Unnamed: 0,ModelNickname,Series,HyperParams,LevelTestSetMAPE,LevelTestSetR2,InSampleMAPE,InSampleR2,Lags
0,mlr,Volume,{},0.147779,-0.103135,0.113658,0.688821,1
7,mlr,Price,{},0.074531,0.400218,0.059451,0.782215,1


Using Ingest_Xvars_df, I don1t could undestend this error.

In [27]:
f_vol2 = Forecaster(data_cali_org['Total Volume'],current_dates = data_cali_org['Date'])
f_vol2.ingest_Xvars_df(data_cali_org_px, date_col='Date')
f_vol2.generate_future_dates(24)

In [28]:
for m in models:
    f_vol2.set_estimator(m)
    f_vol2.tune() # by default, will pull grids from Grids.py
    f_vol2.auto_forecast()

ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by StandardScaler.

Using mlr in sklearn:


In [43]:
data_cali_org_vol = np.array(data_cali_org['AveragePrice'])
data_cali_org_px = np.array(data_cali_org['Total Volume']).reshape((-1,1))

model = LinearRegression().fit(data_cali_org_px, data_cali_org_vol)
r_sq = model.score(data_cali_org_px, data_cali_org_vol)
print(f"coefficient of determination: {r_sq}")

coefficient of determination: 0.2343247035682844


If a did the inverse:

In [45]:
data_cali_org_vol = np.array(data_cali_org['AveragePrice']).reshape((-1,1))
data_cali_org_px = np.array(data_cali_org['Total Volume'])

model = LinearRegression().fit(data_cali_org_vol, data_cali_org_px)
r_sq = model.score(data_cali_org_vol, data_cali_org_px)
print(f"coefficient of determination: {r_sq}")

coefficient of determination: 0.23432470356828417
