In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from hyperopt import Trials, STATUS_OK, tpe, fmin, hp
import time
from sklearn.metrics import mean_absolute_error




In [2]:
train = pd.read_csv("data/train.csv")

In [3]:
# 이상치 판단
def outlier(data, column):
    q25 = np.quantile(data[column].dropna(), 0.25)
    q75 = np.quantile(data[column].dropna(), 0.75)
    iqr = q75 - q25
    iqr_cut = iqr * 3
    result = data[(data[column] > q75 + iqr_cut) | (data[column] < q25 - iqr_cut)].index
    return result

# 보간
def time_interpolate(data, column):
    tem = data[["일시", column]].copy()
    tem.index = pd.to_datetime(tem['일시'])
    tem = tem.drop(["일시"], axis = 1)
    tem = tem.interpolate(method="time")
    return tem[column].values

# 강수량은 기상청에서 정한 강수표현에 따라 구간을 나누는 것으로 수치 변경
# 비가 내리지 않음 : 0, 매우 약한 비 : 0~1, 약한 비 : 1~3, 보통 비 : 3~15, 강한 비 : 15~30, 매우 강한 비 : 30 이상
train.강수량 = pd.cut(train.강수량, bins = [0, 0.9, 2.9, 14.9, 29.9, max(train.강수량)], labels = [1, 2, 3, 4, 5])
train.강수량 = train.강수량.astype('float')
train.강수량 = train.강수량.fillna(0)

rain = pd.get_dummies(train.강수량)
rain.columns = ["비안내림", "매우약한비", "약한비", "보통비", "강한비", "매우강한비"]
train = pd.concat([train, rain.astype("int")], axis=1)
train = train.drop(["강수량"], axis = 1)

train.최고기온 = time_interpolate(train, "최고기온")
train.최저기온 = time_interpolate(train, "최저기온")

train.일교차 = train.최고기온 - train.최저기온

train.평균풍속 = time_interpolate(train, "평균풍속")

train.일조합 = time_interpolate(train, "일조합")

train.loc[0:4749, "일사합"] = 0
train.loc[4780:4854, "일사합"] = 0
train.일사합 = time_interpolate(train, "일사합")


# 삭제
train = train.iloc[train.일조율.dropna().index]

# 가조합
train["가조합"] = train.일조합/(train.일조율/100)
train.가조합 = [np.nan if i == float("inf") else i for i in train.가조합]
train.가조합 = time_interpolate(train, "가조합")

# 일사합/일조합
train["일사_일조"] = train.일사합/train.일조합
train.일사_일조 = [np.nan if i == float("inf") else i for i in train.일사_일조] # 분모가 0인 경우 임의로 값을 설정할 수 없어 보간으로 처리
train.일사_일조 = time_interpolate(train, "일사_일조")

# sin + cos
train["sin_cos"] = [-np.sin(2 * np.pi * int(datetime.strptime(i,"%Y-%m-%d").strftime("%j"))/365) - np.cos(2 * np.pi * int(datetime.strptime(i,"%Y-%m-%d").strftime("%j"))/365) for i in train.일시]


In [4]:
train.일시 = train.일시.str.split("-", expand = True)[0].astype("int")
train = train.reset_index(drop = True)

In [5]:
year = [1980, 1990, 2000, 2005, 2010, 2015]

In [6]:
idx = []
for i in year:
    idx.append(train[train.일시 == i].index[0])
idx

[6939, 10592, 14244, 16071, 17897, 19723]

In [7]:
train = train.drop(["일시"], axis = 1)

In [8]:
# 결측치 없음
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22645 entries, 0 to 22644
Data columns (total 18 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   최고기온     22645 non-null  float64
 1   최저기온     22645 non-null  float64
 2   일교차      22645 non-null  float64
 3   평균습도     22645 non-null  float64
 4   평균풍속     22645 non-null  float64
 5   일조합      22645 non-null  float64
 6   일사합      22645 non-null  float64
 7   일조율      22645 non-null  float64
 8   평균기온     22645 non-null  float64
 9   비안내림     22645 non-null  int32  
 10  매우약한비    22645 non-null  int32  
 11  약한비      22645 non-null  int32  
 12  보통비      22645 non-null  int32  
 13  강한비      22645 non-null  int32  
 14  매우강한비    22645 non-null  int32  
 15  가조합      22645 non-null  float64
 16  일사_일조    22645 non-null  float64
 17  sin_cos  22645 non-null  float64
dtypes: float64(12), int32(6)
memory usage: 2.6 MB


In [14]:
class Model():
    def __init__(self, data, target, input_size, output_size, test_size):
        self.data = data
        self.target = target        
        
        self.input_size = input_size
        self.output_size = output_size
        self.test_size = test_size
        
        self.col_len = len(self.data.columns)

def Scale(self, data):
    scaler_in = MinMaxScaler()
    scaler_out = MinMaxScaler()
    
    inputs = data.drop(self.target, axis = 1)
    inputs_col = inputs.columns
    outputs = data[self.target]
    
    scaler_in.fit(inputs)
    inputs = pd.DataFrame(scaler_in.transform(inputs), columns = inputs_col)
        
    scaler_out.fit(outputs)
    outputs = pd.DataFrame(scaler_out.fit_transform(outputs), columns = self.target)

    data = pd.concat([inputs, outputs], axis = 1)
    
    return data,  scaler_out

def Split(self, data):
    data, _ = self.Scale(data)
    data = tf.keras.utils.timeseries_dataset_from_array(data = data,
                                                             targets = None,
                                                             sequence_length = self.input_size + self.output_size)
    inputs = np.concatenate([x[:, slice(0, self.input_size), :] for x in data], axis=0)
    outputs = np.concatenate([x[:, slice(self.input_size, self.input_size + self.output_size), :] for x in data], axis=0)

    outputs = outputs[:,:,self.col_len-1]
    outputs = outputs.reshape(-1, self.output_size, 1)
    
    train_in = inputs[:int(len(inputs)*0.8), :, :]
    train_out = outputs[:int(len(outputs)*0.8), :, :]
    
    test_in = inputs[int(len(inputs)*0.8):, :, :]
    test_out = outputs[int(len(outputs)*0.8):, :, :]
    
    return train_in, train_out, test_in, test_out

@property
def Data(self):
    return self.Split(self.data)

Model.Scale = Scale
Model.Split = Split
Model.Data = Data

In [55]:
# train 길이 결정
Year_loss = pd.DataFrame([], columns = ["Year", "MAE"])
for i in range(len(idx)):
    print("----------------------------------------------------------------------------")
    print("------------------------------[ {} ] ---------------------------------------".format(year[i]))
    train_c = train.copy()
    train_c = train_c.iloc[idx[i]:]
    
    output_size = 358
    model = Model(data = train_c,
         target = ["평균기온"],
         input_size = output_size * 2,
         output_size = output_size,
         test_size = 0.3)
    train_in, train_out, test_in, test_out = model.Data
    
    lstm_model, history = LSTM_fit(model.Data)
    
    _, _, test_in, _ = model.Split(train_c)
    _, scaler_out = model.Scale(train_c)
    pred = lstm_model.predict(test_in[-1].reshape(1, (output_size*2), len(train_c.columns)))
    pred = np.round(scaler_out.inverse_transform(pred))
    
    
    mae = mean_absolute_error(train_c.iloc[-output_size:].평균기온.values, pred.reshape(-1))
    
    Year_loss.loc[len(Year_loss)] = [year[i],mae]

----------------------------------------------------------------------------
------------------------------[ 1980 ] ---------------------------------------
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
----------------------------------------------------------------------------
------------------------------[ 1990 ] ---------------------------------------
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
----------------------------------------------------------------------------
------------------------------[ 2000 ] ---------------------------------------
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
-------------

In [56]:
# 2005년으로 결정
Year_loss

Unnamed: 0,Year,MAE
0,1980.0,2.97514
1,1990.0,2.913687
2,2000.0,2.740503
3,2005.0,2.615363
4,2010.0,2.631564
5,2015.0,2.715363


In [18]:
train = train.iloc[idx[3]:]

In [19]:
output_size = 358
space = {
    'input_size' : hp.choice("input_size", [output_size, output_size*2, output_size*3]),
    'lstm1_nodes' : hp.choice("lstm1_nodes", [64, 128, 256]),
    'lstm1_dropout' : hp.choice("lstm1_dropout", [0, 0.3, 0.5]),
    'lstm2_nodes' : hp.choice('lstm2_nodes', [64, 128, 256]),
    'lstm2_dropout' : hp.choice("lstm2_dropout", [0, 0.3, 0.5]),
    'num_layers' : hp.choice('num_layers',[
        { 
            'layers' : 'two',
        },
        {
            'layers' : 'three',
            'lstm3_nodes' : hp.choice('lstm3_nodes', [32, 64, 128]),
            'lstm3_dropout' : hp.choice("lstm3_dropout", [0, 0.3, 0.5])
        }
    ]),
    'lr' : hp.choice('lr', [0, 0.001, 0.002, 0.003])
}

In [20]:
Parameter_loss = pd.DataFrame([], columns = ["Parameters", "Loss"])
def hyperopt_model(params):
    global Parameter_loss
    print("--------------------------------------------------------------------------------------------------------------")
    print("---------------------------------------[ START {}]-------------------------------------------------------------".format(len(Parameter_loss)))
    print("Parameter : {}".format(params))
    input_size = params['input_size']
    
    output_size = 358
    model = Model(data = train,
         target = ["평균기온"],
         input_size = params['input_size'],
         output_size = output_size,
         test_size = 0.3)
    
    train_in, train_out, test_in, test_out = model.Data
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss',
                                                      mode = 'min',
                                                      patience = 3,
                                                      min_delta = 0.001)
    tf.random.set_seed = 1234
    initializer = tf.keras.initializers.GlorotUniform(seed=1234)
    
    lstm_model = tf.keras.Sequential()
    lstm_model.add(tf.keras.layers.GRU(params["lstm1_nodes"], 
                                        dropout = params["lstm1_dropout"],
                                        return_sequences = True, 
                                        kernel_initializer=initializer))
    if params["num_layers"]["layers"] == "two":
        lstm_model.add(tf.keras.layers.GRU(params["lstm2_nodes"], 
                                            dropout = params["lstm2_dropout"],
                                            return_sequences = False, 
                                        kernel_initializer=initializer))
    else:
        lstm_model.add(tf.keras.layers.GRU(params["lstm2_nodes"], 
                                            dropout = params["lstm2_dropout"],
                                            return_sequences = True, 
                                        kernel_initializer=initializer))
        lstm_model.add(tf.keras.layers.GRU(params["num_layers"]["lstm3_nodes"], 
                                            dropout = params["num_layers"]["lstm3_dropout"],
                                            return_sequences = False, 
                                        kernel_initializer=initializer))
    lstm_model.add(tf.keras.layers.Dense(output_size, 
                                        kernel_initializer=initializer))
   
    lstm_model.compile(loss = tf.keras.losses.MeanAbsoluteError(),
                      optimizer = tf.keras.optimizers.Adam(learning_rate = params["lr"]),
                      metrics = [tf.keras.metrics.mean_absolute_error])
    
    history = lstm_model.fit(train_in, train_out,
                             epochs = 100,
                             validation_data = [test_in, test_out],
                             callbacks = [early_stopping],
                            verbose = 2)
    val_error = np.amin(history.history["val_loss"])
    Parameter_loss.loc[len(Parameter_loss)] = [params,val_error]
    
    print("val_error : {}".format(val_error))
    print("--------------------------------------------------------------------------------------------------------------")
    
    return {"loss" : val_error, "model":lstm_model, 'status': STATUS_OK}

In [21]:
start = time.time()
trials = Trials()
best = fmin(hyperopt_model,
            space = space,
            algo = tpe.suggest,
            max_evals = 5,
            trials = trials)
end = time.time()

--------------------------------------------------------------------------------------------------------------
---------------------------------------[ START 0]-------------------------------------------------------------
Parameter : {'input_size': 1074, 'lr': 0, 'lstm1_dropout': 0.3, 'lstm1_nodes': 64, 'lstm2_dropout': 0.5, 'lstm2_nodes': 64, 'num_layers': {'layers': 'two'}}

Epoch 1/100                                          



129/129 - 81s - loss: 0.5698 - mean_absolute_error: 0.5698 - val_loss: 0.5807 - val_mean_absolute_error: 0.5807 - 81s/epoch - 625ms/step

Epoch 2/100                                          

129/129 - 82s - loss: 0.5697 - mean_absolute_error: 0.5697 - val_loss: 0.5807 - val_mean_absolute_error: 0.5807 - 82s/epoch - 632ms/step

Epoch 3/100                                          

129/129 - 68s - loss: 0.5697 - mean_absolute_error: 0.5697 - val_loss: 0.5807 - val_mean_absolute_error: 0.5807 - 68s/epoch - 530ms/step

Epoch 4/100                            

In [22]:
best

{'input_size': 1,
 'lr': 1,
 'lstm1_dropout': 1,
 'lstm1_nodes': 2,
 'lstm2_dropout': 1,
 'lstm2_nodes': 1,
 'lstm3_dropout': 0,
 'lstm3_nodes': 1,
 'num_layers': 1}

In [28]:
output_size = 358
model = Model(data = train,
     target = ["평균기온"],
     input_size = output_size * 2,
     output_size = output_size,
     test_size = 0.3)
train_in, train_out, test_in, test_out = model.Data

In [29]:
def LSTM_fit(data):
    train_in, train_out, test_in, test_out = data
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss',
                                                      mode = 'min',
                                                      patience = 3,
                                                      min_delta = 0.001)
    initializer = tf.keras.initializers.GlorotUniform(seed=1234)
    lstm_model = tf.keras.Sequential()
    lstm_model.add(tf.keras.layers.LSTM(256, return_sequences = True, 
                                        dropout = 0.3, 
                                        kernel_initializer=initializer))
    lstm_model.add(tf.keras.layers.LSTM(128, return_sequences = True, 
                                        dropout = 0.3, 
                                        kernel_initializer=initializer))
    lstm_model.add(tf.keras.layers.LSTM(64, return_sequences = False, 
                                        dropout = 0, 
                                        kernel_initializer=initializer))
    lstm_model.add(tf.keras.layers.Dense(output_size, 
                                        kernel_initializer=initializer))
   
    lstm_model.compile(loss = tf.keras.losses.MeanAbsoluteError(),
                      optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
                      metrics = [tf.keras.metrics.mean_absolute_error])
    
    history = lstm_model.fit(train_in, train_out,
                             epochs = 100,
                             validation_data = [test_in, test_out],
                             callbacks = [early_stopping])
    
    return lstm_model, history

In [30]:
lstm_model, history = LSTM_fit(model.Data)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100


In [31]:
_, _, test_in, _ = model.Split(train)
_, scaler_out = model.Scale(train)
lstm_pred = lstm_model.predict(test_in[-1].reshape(1, (output_size*2), len(train.columns)))
lstm_pred = np.round(scaler_out.inverse_transform(lstm_pred))



In [32]:
mean_absolute_error(train.iloc[-output_size:].평균기온.values, lstm_pred.reshape(-1))

2.7946927374301676

In [84]:
output_size = 358
model = Model(data = train,
     target = ["평균기온"],
     input_size = output_size * 2,
     output_size = output_size,
     test_size = 0.3)
train_in, train_out, test_in, test_out = model.Data

In [85]:
def GRU_fit(data):
    train_in, train_out, test_in, test_out = data
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss',
                                                      mode = 'min',
                                                      patience = 3,
                                                      min_delta = 0.001)
   
    initializer = tf.keras.initializers.GlorotUniform(seed=1234)
    lstm_model = tf.keras.Sequential()
    lstm_model.add(tf.keras.layers.GRU(128, return_sequences = True, 
                                        kernel_initializer=initializer))
    lstm_model.add(tf.keras.layers.GRU(128, return_sequences = True, 
                                        kernel_initializer=initializer))
    lstm_model.add(tf.keras.layers.GRU(128, return_sequences = False, 
                                        kernel_initializer=initializer))
    lstm_model.add(tf.keras.layers.Dense(output_size))
   
    lstm_model.compile(loss = tf.keras.losses.MeanAbsoluteError(),
                      optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
                      metrics = [tf.keras.metrics.mean_absolute_error])
    
    history = lstm_model.fit(train_in, train_out,
                             epochs = 100,
                             validation_data = [test_in, test_out],
                             callbacks = [early_stopping])
    
    return lstm_model, history

In [86]:
GRU_model, history = GRU_fit(model.Data)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


In [89]:
_, _, test_in, _ = model.Split(train)
_, scaler_out = model.Scale(train)
gru_pred = GRU_model.predict(test_in[-1].reshape(1, (output_size*2), len(train.columns)))
gru_pred = np.round(scaler_out.inverse_transform(gru_pred))
mean_absolute_error(train.iloc[-output_size:].평균기온.values, gru_pred.reshape(-1))



2.8438547486033516

In [None]:
sub = pd.read_csv("data/sample_submission.csv")
scale_data, scaler_in, scaler_out = model.Scale(train)
pred = GRU_model.predict(scale_data[-(output_size*2):].values.reshape(1, (output_size*2), 13))
pred = np.round(scaler.inverse_transform(pred))
sub.평균기온 = pred[0]
sub.to_csv("data/GRU_128_128_128.csv", index = False)

In [None]:
# !pip install git+https://github.com/ourownstory/neural_prophet.git
from neuralprophet import NeuralProphet

In [None]:
train = pd.read_csv("data/train.csv")
# 이상치 판단
def outlier(data, column):
    q25 = np.quantile(data[column].dropna(), 0.25)
    q75 = np.quantile(data[column].dropna(), 0.75)
    iqr = q75 - q25
    iqr_cut = iqr * 3
    result = data[(data[column] > q75 + iqr_cut) | (data[column] < q25 - iqr_cut)].index
    return result

# 보간
def time_interpolate(data, column):
    tem = data[["일시", column]].copy()
    tem.index = pd.to_datetime(tem['일시'])
    tem = tem.drop(["일시"], axis = 1)
    tem = tem.interpolate(method="time")
    return tem[column].values

# 강수량은 기상청에서 정한 강수표현에 따라 구간을 나누는 것으로 수치 변경
# 비가 내리지 않음 : 0, 매우 약한 비 : 0~1, 약한 비 : 1~3, 보통 비 : 3~15, 강한 비 : 15~30, 매우 강한 비 : 30 이상
train.강수량 = pd.cut(train.강수량, bins = [0, 0.9, 2.9, 14.9, 29.9, max(train.강수량)], labels = [1, 2, 3, 4, 5])
train.강수량 = train.강수량.astype('float')
train.강수량 = train.강수량.fillna(0)

rain = pd.get_dummies(train.강수량)
rain.columns = ["비안내림", "매우약한비", "약한비", "보통비", "강한비", "매우강한비"]
train = pd.concat([train, rain.astype("int")], axis=1)
train = train.drop(["강수량"], axis = 1)

train.최고기온 = time_interpolate(train, "최고기온")
train.최저기온 = time_interpolate(train, "최저기온")

train.일교차 = train.최고기온 - train.최저기온

train.평균풍속 = time_interpolate(train, "평균풍속")

train.일조합 = time_interpolate(train, "일조합")

train.loc[0:4749, "일사합"] = 0
train.loc[4780:4854, "일사합"] = 0
train.일사합 = time_interpolate(train, "일사합")


# 삭제
train = train.iloc[train.일조율.dropna().index]

# 가조합
train["가조합"] = train.일조합/(train.일조율/100)
train.가조합 = [np.nan if i == float("inf") else i for i in train.가조합]
train.가조합 = time_interpolate(train, "가조합")

# 일사합/일조합
train["일사_일조"] = train.일사합/train.일조합
train.일사_일조 = [np.nan if i == float("inf") else i for i in train.일사_일조] # 분모가 0인 경우 임의로 값을 설정할 수 없어 보간으로 처리
train.일사_일조 = time_interpolate(train, "일사_일조")

# sin + cos
train["sin_cos"] = [-np.sin(2 * np.pi * int(datetime.strptime(i,"%Y-%m-%d").strftime("%j"))/365) - np.cos(2 * np.pi * int(datetime.strptime(i,"%Y-%m-%d").strftime("%j"))/365) for i in train.일시]


In [4]:
train = pd.read_csv("data/train.csv")
train = train.fillna(0)
train

Unnamed: 0,일시,최고기온,최저기온,일교차,강수량,평균습도,평균풍속,일조합,일사합,일조율,평균기온
0,1960-01-01,2.2,-5.2,7.4,0.0,68.3,1.7,6.7,0.00,0.0,-1.6
1,1960-01-02,1.2,-5.6,6.8,0.4,87.7,1.3,0.0,0.00,0.0,-1.9
2,1960-01-03,8.7,-2.1,10.8,0.0,81.3,3.0,0.0,0.00,0.0,4.0
3,1960-01-04,10.8,1.2,9.6,0.0,79.7,4.4,2.6,0.00,0.0,7.5
4,1960-01-05,1.3,-8.2,9.5,0.0,44.0,5.1,8.2,0.00,0.0,-4.6
...,...,...,...,...,...,...,...,...,...,...,...
23006,2022-12-27,3.3,-7.3,10.6,0.0,69.8,1.8,8.8,10.25,91.7,-2.6
23007,2022-12-28,0.1,-6.0,6.1,0.1,58.1,2.5,8.7,10.86,90.6,-3.3
23008,2022-12-29,2.1,-7.8,9.9,0.0,56.3,1.7,9.0,10.88,93.8,-2.9
23009,2022-12-30,2.3,-4.4,6.7,0.0,65.6,1.9,7.9,10.84,82.3,-1.8


In [None]:
train = train.iloc[16071:]


train['일시'] = pd.to_datetime(train['일시'])
train = train.set_index('일시')

# 데이터의 시간 간격 지정
train.index.freq = 'D'
train = train.reset_index()
train = train.rename(columns={'일시': 'ds', '평균기온': 'y'})
train = train.reset_index(drop=True)

train_c = train.copy()
i = int(len(train)*0.8)
train = train_c.iloc[:i]
test = train_c.iloc[i:]

col_lst=train.columns
col_lst=col_lst.drop(['ds','y'])
col_lst=list(col_lst)

In [None]:
m = NeuralProphet(

growth='off', # 추세 유형 설정(linear, discontinuous, off 중 선택 가능)
n_forecasts=358,
yearly_seasonality=True, #년간 계절성 설정

weekly_seasonality=False, #주간 계절성 설정

daily_seasonality=False, #일간 계절성 설정

epochs=100,#학습 횟수 설정

learning_rate=0.1
)

#독립 변인(변수) 추가 및 정규화
m = m.add_lagged_regressor(names=col_lst, normalize="minmax") 

#학습 수행
metrics = m.fit(train, freq='D', validation_df=test, progress='plot')

In [None]:
print("MAE(Train): ", metrics.MAE.tail(1).item())
print("MAE(Test): ", metrics.MAE_val.tail(1).item())

In [None]:
#future = metrics.make_future_dataframe(test, periods=358)
#pred = metrics.predict(future)
pred = metrics.predict(test)