# pytorch-lightning のインストール

In [None]:
% pip install --upgrade pip
% pip install -U pytorch-lightning==0.6.0

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/54/eb/4a3642e971f404d69d4f6fa3885559d67562801b99d7592487f1ecc4e017/pip-20.3.3-py2.py3-none-any.whl (1.5MB)
[K     |▏                               | 10kB 18.6MB/s eta 0:00:01[K     |▍                               | 20kB 24.9MB/s eta 0:00:01[K     |▋                               | 30kB 24.1MB/s eta 0:00:01[K     |▉                               | 40kB 16.7MB/s eta 0:00:01[K     |█                               | 51kB 15.7MB/s eta 0:00:01[K     |█▎                              | 61kB 12.0MB/s eta 0:00:01[K     |█▌                              | 71kB 12.3MB/s eta 0:00:01[K     |█▊                              | 81kB 12.3MB/s eta 0:00:01[K     |██                              | 92kB 12.0MB/s eta 0:00:01[K     |██▏                             | 102kB 11.7MB/s eta 0:00:01[K     |██▍                             | 112kB 11.7MB/s eta 0:00:01[K     |██▋                             | 122kB 11.7MB/

# インポート
- numpy
- pandas

In [None]:

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

# データロード

In [None]:
import pandas as pd
import datetime
import numpy as np

df = pd.read_csv("drive/MyDrive/glu_analysis/glu.csv")

def convert_date(tstr):
  #tstr = '2020-11-28T06:02:08'
  tdatetime = datetime.datetime.strptime(tstr, '%Y-%m-%dT%H:%M:%S')
  delta = datetime.timedelta(hours=9)
  tdatetime.strftime('%H:%M:%S')
  tdatetime = tdatetime + delta
  return tdatetime.strftime('%Y-%m-%d')

def convert_time(tstr):
  #tstr = '2020-11-28T06:02:08'
  tdatetime = datetime.datetime.strptime(tstr, '%Y-%m-%dT%H:%M:%S')
  delta = datetime.timedelta(hours=9)
  tdatetime.strftime('%H:%M:%S')
  tdatetime = tdatetime + delta
  return tdatetime.strftime('%H:%M')

def convert_timeindex(tstr):
  #tstr = '2020-11-28T06:02:08'
  tdatetime = datetime.datetime.strptime(tstr, '%Y-%m-%dT%H:%M:%S')
  delta = datetime.timedelta(hours=9)
  tdatetime.strftime('%H:%M:%S')
  tdatetime = tdatetime + delta
  index = int(tdatetime.strftime('%H'))*60+int(tdatetime.strftime('%M')) 
  #24時間を５分毎に分けた領域のインデックス
  return int(index)
  
#convert_timeindex("")


def convert_df(df):
  expand_df = df.sysTime.str.split('.', expand=True)
  expand_df.columns = ['date', 'time']
  df_concat = pd.concat([df, expand_df])
  df_concat.date = expand_df.date.map(convert_date)
  df_concat.time = expand_df.date.map(convert_timeindex)
  return df_concat

df = convert_df(df)
df.query('sgv < 150')[['date','time','sgv']].head()

In [None]:
# 教師データの作成
x, t = [], []
sgv = df.sgv.dropna()
print(sgv)

for i in range(len(sgv)-1):
    x.append(sgv[i])
    t.append(sgv[i+1])

print(x)
print(t)

In [None]:
import torch
# PyTorch で学習に使用できる形式へ変換
x = torch.tensor(x, dtype=torch.float32)
t = torch.tensor(t, dtype=torch.int64)

In [None]:
# 各データに配分するサンプル数を計算
n_train = int(len(x) * 0.6)
n_val = int(len(x) * 0.2)

x_train, t_train = x[0: n_train], t[0: n_train]
x_val, t_val = x[n_train: n_train+n_val], t[n_train: n_train+n_val]
x_test, t_test = x[n_train+n_val:], t[n_train+n_val:]

In [None]:
# ひとつのオブジェクトにまとめる
train = torch.utils.data.TensorDataset(x_train, t_train)
val = torch.utils.data.TensorDataset(x_val, t_val)
test = torch.utils.data.TensorDataset(x_test, t_test)

In [None]:
type(train), type(val), type(test)

In [None]:
len(train), len(val), len(test)

# ネットワークの作成
- torch.nn
- torch.nn.functional
- pytorch_lightning
- trainer

In [None]:
from pytorch_lightning import Trainer

# 学習データの処理
class TrainNet(pl.LightningModule):

    @pl.data_loader
    def train_dataloader(self):
        return torch.utils.data.DataLoader(train, self.batch_size, num_workers=self.num_workers)

    def training_step(self, batch, batch_nb):
        x, t = batch
        y = self.forward(x)
        loss = self.lossfun(y, t)
        tensorboard_logs = {'train_loss': loss}
        results = {'loss': loss, 'log':tensorboard_logs}
        return results

# 学習データの処理
class ValidationNet(pl.LightningModule):

    @pl.data_loader
    def val_dataloader(self):
        return torch.utils.data.DataLoader(val, self.batch_size)

    def validation_step(self, batch, batch_nb):
        x, t = batch
        y = self.forward(x)
        loss = self.lossfun(y, t)
        results = {'val_loss': loss}
        return results

    def validation_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        tensorboard_logs = {'test_loss': avg_loss}
        results = {'val_loss': avg_loss, 'log':tensorboard_logs}
        return results

# テストデータの処理
class TestNet(pl.LightningModule):

    @pl.data_loader
    def test_dataloader(self):
        return torch.utils.data.DataLoader(test, self.batch_size)

    def test_step(self, batch, batch_nb):
        x, t = batch
        y = self.forward(x)
        loss = self.lossfun(y, t)
        results = {'test_loss': loss}
        return results

    def test_end(self, outputs):
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        tensorboard_logs = {'test_loss': avg_loss}
        results = {'test_loss': avg_loss, 'log':tensorboard_logs}
        return results

In [None]:
# 学習データ、検証データ、テストデータへの処理を継承したクラス
'''
class Net(TrainNet, ValidationNet, TestNet):

    def __init__(self, input_size=1, hidden_size=10, output_size=1, batch_size=32, num_workers=1):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)
        self.batch_size = batch_size
        self.num_workers = num_workers

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

    def lossfun(self, y, t):
        return F.mse_loss(y.float(), t.float())

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.01)
        '''

In [None]:
# LSTM モデルの作成
class LSTM(TrainNet, ValidationNet, TestNet):

    def __init__(self, input_size=1, hidden_size=10, output_size=1, batch_size=32, num_workers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)
        self.batch_size = batch_size
        self.num_workers = num_workers

    def forward(self, x):
        x = x.view(x.size(0), 1, 1)
        x, (h, c) = self.lstm(x)
        x = self.linear(x.view(x.size(0), -1))
        return x

    def lossfun(self, y, t):
        return F.mse_loss(y.float(), t.float())

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.01)

# 学習

## 再現性を保つためにシードを固定

In [None]:
# 再現性の確保
torch.manual_seed(0)

## 予測モデルのインスタンス化
- インスタンス化
- チェックポイントの設定
- Trainerのcallback設定

In [None]:
net = LSTM()

from pytorch_lightning.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    filepath='drive/MyDrive/glu_analysis/weights.ckpt',
    verbose=True,
    monitor='val_loss',
    mode='min'
)

trainer = Trainer(checkpoint_callback=checkpoint_callback, min_epochs=1000)

# 学習の実行
trainer.fit(net)

# 検証

In [None]:
# テストデータに対する検証
trainer.test()

# テストデータに対する結果
trainer.callback_metrics



In [None]:
#%load_ext tensorboard
#%tensorboard --logdir ./lightning_logs/version_11//

# Writer will output to ./runs/ directory by default
%tensorboard --logdir ./runs/

# 推論



## 推論の準備
- チェックポイントからロード

In [None]:
checkpoint = torch.load('drive/MyDrive/glu_analysis/weights.ckpt/_ckpt_epoch_219.ckpt')
 

In [None]:
# インスタンス生成（データセット不要）
model = LSTM()
model.load_state_dict(checkpoint['state_dict'])

## 推論
- テストデータをモデルに食べさせる

In [None]:
# 推論モード
model.eval()
model.freeze()

In [None]:
outputs = model(x_test)

In [None]:
fig, ax = plt.subplots()

xaxis = range(len(outputs))

ax.plot(xaxis, outputs)
ax.plot(xaxis, t_test)

ax.set_yticklabels([""])
plt.show()