---
## ４章
---

---
# PyTorchのインポートと前処理済みデータの読み込み
---

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

import torch

credit_df = pd.read_csv('credit_onehot.csv')

#データ確認
credit_df.head()

Unnamed: 0,返済期間,借入金額,年齢,当座預金口座_0DM未満,当座預金口座_200DM以上または給与振込口座,当座預金口座_200DM未満,当座預金口座_口座なし,信用履歴_借金なし,信用履歴_当銀行の返済金なし,信用履歴_要注意人物,...,その他借入_消費者金融,その他借入_銀行,住居_家主,住居_家族と同居,住居_賃貸,仕事_パート/アルバイト,仕事_正社員,仕事_無職,仕事_管理者/役員,審査結果
0,42,7882,45.0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
1,48,5951,22.0,0,0,1,0,0,0,0,...,0,0,1,0,0,0,1,0,0,1
2,24,4870,53.0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,1
3,36,9055,35.0,0,0,0,1,0,0,0,...,0,0,0,1,0,1,0,0,0,0
4,24,2835,53.0,0,0,0,1,0,0,0,...,0,0,1,0,0,0,1,0,0,0


---
# 入力データと正解ラベル、さらに学習用データとテスト用データに分割
---

In [2]:
X = credit_df.iloc[:,:-1]
y = credit_df.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state=0)

---
# PyTorch用のテンソル（多次元行列）への変換
---

In [3]:
X_train = torch.tensor(X_train.values).float()
y_train = torch.tensor(y_train.values).long()
X_test = torch.tensor(X_test.values).float()
y_test = torch.tensor(y_test.values).long()

#テンソル型のXとYをデータローダーにセット
train = torch.utils.data.TensorDataset(X_train, y_train)

---
# データの数を確認
---

In [4]:
print("入力データの数:",X_train.shape)
print("正解ラベルの数:",y_train.shape)

#trainのテンソルの形を確認
train[0]

入力データの数: torch.Size([1057, 54])
正解ラベルの数: torch.Size([1057])


(tensor([1.8000e+01, 1.4420e+03, 3.2000e+01, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]),
 tensor(1))

---
# シャッフルして15個づつのデータに細かく分割
---

In [5]:
# PyTorch のRNGシードを初期化（再現性の為）
torch.manual_seed(0)

train_loader = torch.utils.data.DataLoader(train, batch_size=15, shuffle=True)

for i in train_loader:
    print(i)
    print("------------------------")

[tensor([[1.8000e+01, 1.4420e+03, 3.2000e+01, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [2.4000e+01, 1.9350e+03, 3.1000e+01, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.000

[tensor([[1.8000e+01, 8.4710e+03, 2.3000e+01, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00],
        [3.0000e+01, 1.8670e+03, 5.8000e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.000

[tensor([[9.0000e+00, 1.1360e+03, 3.2000e+01, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00],
        [2.4000e+01, 1.2070e+03, 2.4000e+01, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.000

[tensor([[2.4000e+01, 1.2870e+03, 3.7000e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 1.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00],
        [5.4000e+01, 1.5945e+04, 5.8000e+01, 0.0000e+00, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.000

---
# モデルの定義
---

In [6]:
#入力値は54個、中間層が128個、出力は2分類のため、2を設定。

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(54, 128)
        self.fc2 = torch.nn.Linear(128, 2)

    def forward(self, x):
        x = self.fc1(x)
        x = torch.nn.functional.relu(x)
        x = self.fc2(x)
        x = torch.nn.functional.log_softmax(x, dim=0)
        return x

---
# 学習
---

In [7]:
model = Net()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(500): #学習回数500回
    total_loss = 0

    for X_train, y_train in train_loader:
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    #途中の損失関数を確認
    if (epoch + 1) % 50 == 0:
        print(epoch + 1, total_loss)

50 49.21345829963684
100 49.21345829963684
150 49.21345829963684
200 49.21345829963684
250 49.21345829963684
300 49.21345829963684
350 49.21345829963684
400 49.21345829963684
450 49.21345829963684
500 49.21345829963684


---
# 精度確認
---

In [8]:
#学習済みモデルを使用し、テストデータの5件分を予測
print(model(X_test[:5]).data)
print("------------------")
print(torch.max(model(X_test[:5]).data, 1))
print("------------------")

#テストデータで予測結果を作成
_ , y_pred = torch.max(model(X_test).data, 1)

accuracy = accuracy_score(y_test, y_pred)

print("正確度：",accuracy)

tensor([[-1.6094, -1.6094],
        [-1.6094, -1.6094],
        [-1.6094, -1.6094],
        [-1.6094, -1.6094],
        [-1.6094, -1.6094]])
------------------
torch.return_types.max(
values=tensor([-1.6094, -1.6094, -1.6094, -1.6094, -1.6094]),
indices=tensor([1, 1, 1, 1, 1]))
------------------
正確度： 0.2891832229580574


---
# モデルを再定義
---

In [9]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(54, 128)
        self.fc2 = torch.nn.Linear(128, 128)
        self.fc3 = torch.nn.Linear(128, 128)
        self.fc4 = torch.nn.Linear(128, 128)
        self.fc5 = torch.nn.Linear(128, 128)
        self.fc6 = torch.nn.Linear(128, 2)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = torch.nn.functional.relu(self.fc4(x))
        x = torch.nn.functional.relu(self.fc5(x))
        x = torch.nn.functional.log_softmax(self.fc6(x), dim=0)
        return x

---
# 再学習
---

In [10]:
model = Net()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(500): #学習回数500回
    total_loss = 0

    for X_train, y_train in train_loader:
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() 

    #途中の損失関数を確認
    if (epoch + 1) % 50 == 0:
        print(epoch + 1, total_loss)

50 45.62314224243164
100 45.33018630743027
150 45.344267427921295
200 45.24678662419319
250 44.119410932064056
300 44.69518154859543
350 44.4057277739048
400 44.02384251356125
450 44.39625692367554
500 43.81282064318657


---
# 再度、精度確認
---

In [11]:
#テストデータで予測結果を作成
_ , y_pred = torch.max(model(X_test).data, 1)

accuracy = accuracy_score(y_test, y_pred)

print("正確度：",accuracy)

正確度： 0.717439293598234
