In [8]:
import pandas as pd
import numpy as np

In [19]:
df = pd.read_csv('./AirPassengers.csv')
df.rename({'Passengers': 'y'}, axis=1, inplace=True)
df

Unnamed: 0,Month,y
0,1949-01-01,112
1,1949-02-01,118
2,1949-03-01,132
3,1949-04-01,129
4,1949-05-01,121
...,...,...
139,1960-08-01,606
140,1960-09-01,508
141,1960-10-01,461
142,1960-11-01,390


In [48]:
import torch
import torch.nn as nn
import torch.optim as optimizers
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [34]:
def preprocessing(df: pd.DataFrame):
    max_lag = 10
    for i in range(1, max_lag+1):
        df[f'y_lag_{i}'] = df['y'].shift(i)
    
    n = 10
    df[f'mean_{n}'] = df['y'].rolling(n).mean().shift(1)
    df[f'std_{n}'] = df['y'].rolling(n).std().shift(1)
    
    df['time'] = np.arange(df.shape[0])
    
    return df

preprocessing(df)

Unnamed: 0,Month,y,y_lag_1,y_lag_2,y_lag_3,y_lag_4,y_lag_5,y_lag_6,y_lag_7,y_lag_8,y_lag_9,y_lag_10,mean_10,std_10,time
0,1949-01-01,112,,,,,,,,,,,,,0
1,1949-02-01,118,112.0,,,,,,,,,,,,1
2,1949-03-01,132,118.0,112.0,,,,,,,,,,,2
3,1949-04-01,129,132.0,118.0,112.0,,,,,,,,,,3
4,1949-05-01,121,129.0,132.0,118.0,112.0,,,,,,,,,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,1960-08-01,606,622.0,535.0,472.0,461.0,419.0,391.0,417.0,405.0,362.0,407.0,449.1,77.856635,139
140,1960-09-01,508,606.0,622.0,535.0,472.0,461.0,419.0,391.0,417.0,405.0,362.0,469.0,90.332718,140
141,1960-10-01,461,508.0,606.0,622.0,535.0,472.0,461.0,419.0,391.0,417.0,405.0,483.6,82.583560,141
142,1960-11-01,390,461.0,508.0,606.0,622.0,535.0,472.0,461.0,419.0,391.0,417.0,489.2,78.457065,142


In [90]:
train_val, test = train_test_split(df.drop('Month', axis=1).dropna(), test_size=0.2)
train, val = train_test_split(train_val, test_size=0.2)
X_train, y_train = train.drop('y', axis=1).values, train['y'].values.reshape(-1, 1)
X_val, y_val = val.drop('y', axis=1).values, val['y'].values.reshape(-1,1)

In [91]:
y_train.shape

(85, 1)

In [108]:
# 
np.random.seed(123)
torch.manual_seed(123)
device = torch.device('cpu')


class MLP(nn.Module):
    '''多層パーセプトロン'''
    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int):
        super().__init__()
        self.l1 = nn.Linear(input_dim, hidden_dim)
        self.a1 = nn.Sigmoid()
        self.l2 = nn.Linear(hidden_dim, output_dim)
        self.a2 = nn.Sigmoid()
        
        self.layers = [self.l1, self.a1, self.l2, self.a2]
        
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

n_features = df.shape[1] - 2
print(f'n_features: {n_features}')
model = MLP(input_dim=n_features, hidden_dim=n_features*2, output_dim=1).to(device)
criterion = nn.BCELoss()
optimizer = optimizers.SGD(model.parameters(), lr=0.1)

def compute_loss(y, yhat):
    return criterion(yhat, y)

def train_step(x, y):
    model.train()
    preds = model(x)
    loss = compute_loss(y, preds)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss

epochs = 100
batch_size = 10
n_batches = X_train.shape[0] // batch_size

for epoch in range(epochs):
    train_loss = 0.
    x_, y_ = shuffle(X_train, y_train)
    x_ = torch.Tensor(x_).to(device)
    y_ = torch.Tensor(y_).to(device)
    
    for n_batch in range(n_batches):
        start = n_batch * batch_size
        end = start + batch_size
        loss = train_step(x_[start:end], y_[start:end])
        train_loss += loss.item()
    
    print('epoch: {}, loss: {:.3}'.format(
        epoch+1,
        train_loss
    ))

n_features: 13
epoch: 1, loss: -2.04e+05
epoch: 2, loss: -2.4e+05
epoch: 3, loss: -2.38e+05
epoch: 4, loss: -2.41e+05
epoch: 5, loss: -2.37e+05
epoch: 6, loss: -2.39e+05
epoch: 7, loss: -2.4e+05
epoch: 8, loss: -2.41e+05
epoch: 9, loss: -2.4e+05
epoch: 10, loss: -2.39e+05
epoch: 11, loss: -2.37e+05
epoch: 12, loss: -2.41e+05
epoch: 13, loss: -2.39e+05
epoch: 14, loss: -2.37e+05
epoch: 15, loss: -2.38e+05
epoch: 16, loss: -2.35e+05
epoch: 17, loss: -2.36e+05
epoch: 18, loss: -2.41e+05
epoch: 19, loss: -2.36e+05
epoch: 20, loss: -2.35e+05
epoch: 21, loss: -2.41e+05
epoch: 22, loss: -2.4e+05
epoch: 23, loss: -2.37e+05
epoch: 24, loss: -2.4e+05
epoch: 25, loss: -2.35e+05
epoch: 26, loss: -2.39e+05
epoch: 27, loss: -2.4e+05
epoch: 28, loss: -2.46e+05
epoch: 29, loss: -2.38e+05
epoch: 30, loss: -2.36e+05
epoch: 31, loss: -2.37e+05
epoch: 32, loss: -2.3e+05
epoch: 33, loss: -2.41e+05
epoch: 34, loss: -2.41e+05
epoch: 35, loss: -2.39e+05
epoch: 36, loss: -2.4e+05
epoch: 37, loss: -2.42e+05
epo

In [103]:
x_, y_ = shuffle(X_train, y_train)
y_.shape

(85, 1)

In [105]:
yhat = model(torch.Tensor(x_))
compute_loss(torch.Tensor(y_), yhat)

RuntimeError: all elements of input should be between 0 and 1

In [89]:
y_.shape

torch.Size([85])