In [41]:
# ライブラリのインポート
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import torch
from torch import nn, optim, utils
from torch.utils.data import DataLoader, TensorDataset, Dataset
from torchvision import transforms
from torchinfo import summary
from torch.autograd import Variable

In [42]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [43]:
# df = pd.read_csv("https://raw.githubusercontent.com/aweglteo/tokyo_weather_data/main/data.csv", parse_dates=True, index_col=0)
# df.to_csv("./data/wether.csv")
df = pd.read_csv("./data/wether.csv", parse_dates=True, index_col=0)
df

Unnamed: 0,cloud,wind,ave_tmp,max_tmp,min_tmp,rain
2018-05-17,9.0,3.7,24.0,28.4,21.4,0.0
2018-05-18,7.8,3.0,23.4,27.1,20.0,0.0
2018-05-19,7.5,3.9,21.5,26.3,15.3,0.0
2018-05-20,4.3,3.5,16.8,22.1,13.1,0.0
2018-05-21,10.0,2.4,20.3,27.2,13.7,0.0
...,...,...,...,...,...,...
2023-05-13,10.0,3.0,16.7,18.1,15.4,5.0
2023-05-14,10.0,3.6,16.9,21.2,14.3,3.0
2023-05-15,9.8,2.0,16.1,18.1,14.6,15.5
2023-05-16,2.8,2.6,19.9,27.5,13.1,0.0


In [44]:
def normalize(r):
    norm = np.linalg.norm(r)
    return r / norm

In [45]:
cloud = normalize(df["cloud"])
wind = normalize(df["wind"])
ave_tmp = normalize(df["ave_tmp"])
max_tmp = normalize(df["max_tmp"])
min_tmp = normalize(df["min_tmp"])
rain = normalize(df["rain"])

data = pd.DataFrame(
    np.array([
        cloud,
        wind,
        ave_tmp,
        max_tmp,
        min_tmp,
        rain        
    ]).T,
    columns=[
        "cloud",
        "wind",
        "ave_tmp",
        "max_tmp",
        "min_tmp",
        "rain"
    ]
)

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1827 entries, 0 to 1826
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   cloud    1827 non-null   float64
 1   wind     1827 non-null   float64
 2   ave_tmp  1827 non-null   float64
 3   max_tmp  1827 non-null   float64
 4   min_tmp  1827 non-null   float64
 5   rain     1827 non-null   float64
dtypes: float64(6)
memory usage: 85.8 KB


In [46]:
df_train, df_test = train_test_split(data, test_size=0.3, shuffle=False)
ws, n_dim = 20, df_train.shape[1]
n_train, n_test = len(df_train) - ws, len(df_test) - ws

train = np.array([df_train.iloc[i:i+ws].values for i in range(n_train)])
train_labels = np.array([df_train.iloc[i+ws].values for i in range(n_train)])[:, 2]
test = np.array([df_test.iloc[i:i+ws].values for i in range(n_test)])
test_labels = np.array([df_test.iloc[i+ws].values for i in range(n_test)])

In [47]:
train = torch.tensor(train, dtype=torch.float)
labels = torch.tensor(train_labels, dtype=torch.float)
dataset = torch.utils.data.TensorDataset(train, labels)
train_loader = DataLoader(dataset, batch_size=4, shuffle=True)

In [48]:
class MyLSTM(nn.Module):
    def __init__(self, feature_size, hidden_dim, n_layers):
        super(MyLSTM, self).__init__()
        
        self.feature_size = feature_size
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.n_output = 1
        
        self.lstm = nn.LSTM(feature_size, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, self.n_output)
    
    def forward(self, x):
        # hidden stateとcell stateをゼロで初期化
        h_0 = torch.zeros(self.n_layers, x.size(0), self.hidden_dim).to(x.device)
        c_0 = torch.zeros(self.n_layers, x.size(0), self.hidden_dim).to(x.device)
        
        output, (hn, cn) = self.lstm(x, (h_0, c_0), self.hidden_dim)
        
        hn = hn.view(-1, self.hidden_dim)
        y = self.fc(hn).reshape(self.n_output, -1)
        
        return y

feature_size = 6
n_hidden = 64
n_layers = 1
net = MyLSTM(feature_size, n_hidden, n_layers)

In [49]:
summary(net)

Layer (type:depth-idx)                   Param #
MyLSTM                                   --
├─LSTM: 1-1                              18,432
├─Linear: 1-2                            65
Total params: 18,497
Trainable params: 18,497
Non-trainable params: 0

In [None]:
func_loss = nn.MSELoss()
optimizer = optim.AdamW(net.parameters(), lr=0.001)
loss_history = []
epochs = 200

net.to(device)