# Tuning a Pyteller Pipeline
In this notebook, we walk through the processes of tuning a pipeline, which is as simple as specifying which parameters to tune and their ranges in the pipeline `.json`, then setting the `tune=True` argument in the `pyteller.fit` method.

In [7]:
from pyteller.data import load_data

 
## Load the input Data
Input data is loaded from s3 bucket

In [8]:
current_data,input_data = load_data('AL_Weather')
current_data.head()

Unnamed: 0,station,valid,tmpf,dwpf,relh,drct,sknt,p01i,alti,vsby,feel
0,8A0,1/1/16 0:15,41.0,39.2,93.24,350,6,0.0,30.25,10.0,36.37
1,4A6,1/1/16 0:15,41.0,32.0,70.08,360,5,0.0,30.3,10.0,37.08
2,8A0,1/1/16 0:35,39.2,37.4,93.19,360,6,0.0,30.25,10.0,34.2
3,4A6,1/1/16 0:35,41.0,32.0,70.08,360,5,0.0,30.29,10.0,37.08
4,8A0,1/1/16 0:55,37.4,37.4,100.0,360,8,0.0,30.25,10.0,30.76


In [13]:
pipeline = 'pyteller.LSTM.LSTM'

## Set hyperparameters

In [14]:
hyperparameters = {
    'keras.Sequential.LSTMTimeSeriesRegressor#1': {
        'epochs': 10,
        'verbose': False
    }
}


In [15]:
from mlblocks.discovery import find_pipelines
find_pipelines('pyteller')

['pyteller.ARIMA.arima',
 'pyteller.LSTM.LSTM',
 'pyteller.persistence.persistence']

## Instantiate the pyteller pipeline 
by specifying the column names and desired prediction length

In [16]:
from pyteller.core import Pyteller

pyteller = Pyteller(
    pipeline=pipeline,
    pred_length=5,
    offset=0,
    time_column='valid',
    targets='tmpf',
    entity_column='station',
    entities='8A0',
)

## Get the tunable hyperparameters
These hyperparamteres are set in the primitive `.jsons` and also can be set in the pipelines in the `tunable` `.json` field


In [17]:
tunables = pyteller.pipeline.get_tunable_hyperparameters(flat=True)
print(tunables)

{('sklearn.impute.SimpleImputer#1', 'strategy'): {'type': 'str', 'default': 'mean', 'values': ['mean', 'median', 'most_frequent', 'constant']}, ('keras.Sequential.LSTMTimeSeriesRegressor#1', 'lstm_1_units'): {'type': 'int', 'default': 80, 'range': [1, 500]}, ('keras.Sequential.LSTMTimeSeriesRegressor#1', 'dropout_1_rate'): {'type': 'float', 'default': 0.3, 'range': [0.01, 0.75]}, ('keras.Sequential.LSTMTimeSeriesRegressor#1', 'lstm_2_units'): {'type': 'int', 'default': 80, 'range': [1, 500]}, ('keras.Sequential.LSTMTimeSeriesRegressor#1', 'dropout_2_rate'): {'type': 'float', 'default': 0.3, 'range': [0.01, 0.75]}}


## Fit the pipeline

In [18]:
pyteller.fit(current_data, tune=True, max_evals=4)

scoring pipeline 1
New best found: 5.299655460389213
scoring pipeline 2
New best found: 2.9210532356686763
scoring pipeline 3


scoring pipeline 4


## Check what tuner found the best hyperparameters to be

In [19]:
best_params=pyteller.pipeline.get_hyperparameters()
print(best_params)

{'pyteller.primitives.preprocessing.format_data#1': {'make_index': False}, 'pyteller.primitives.preprocessing.get_index#1': {}, 'sklearn.impute.SimpleImputer#1': {'missing_values': nan, 'fill_value': None, 'verbose': False, 'copy': True, 'strategy': 'most_frequent'}, 'mlprimitives.custom.preprocessing.RangeScaler#1': {'out_min': -1, 'out_max': 1}, 'mlprimitives.custom.timeseries_preprocessing.rolling_window_sequences#1': {'window_size': 144, 'target_size': 5, 'step_size': 2, 'target_column': 0, 'offset': 0, 'drop_windows': False}, 'keras.Sequential.LSTMTimeSeriesRegressor#1': {'classification': False, 'verbose': 1, 'epochs': 1, 'callbacks': [{'class': 'keras.callbacks.EarlyStopping', 'args': {'monitor': 'val_loss', 'patience': 10, 'min_delta': 0.0003}}], 'validation_split': 0.2, 'batch_size': 64, 'input_shape': None, 'dense_units': 5, 'optimizer': 'keras.optimizers.Adam', 'loss': 'keras.losses.mean_squared_error', 'metrics': ['mse'], 'return_sequences': False, 'layers': [{'class': 'ker

## Forecast

In [20]:
output = pyteller.forecast(data=input_data, postprocessing=False, predictions_only=False)



## Evaluate

In [21]:
scores = pyteller.evaluate(actuals=output['actuals'], forecasts=output['forecasts'],
                           metrics=['MAPE', 'sMAPE'])


scores.head()

Unnamed: 0,tmpf
sMAPE,10.455404
MAPE,10.754504


## Plot

In [None]:
pyteller.plot(output)


