In [None]:
%pylab inline

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Classification of iris dataset

In [None]:
from sklearn.datasets import load_iris
data = load_iris()

In [None]:
# Do not execute this cell when you use example dataset.
# （サンプルデータを使う場合は，このセルを実行しないこと）

# If you want to use your own dataset, prepare a csv file as "data.csv" in the following format
# （自分で用意したデータセットを使う場合は，データを以下のような形式のcsvファイルを「data.csv」として用意する）
# sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
# 5.1,3.5,1.4,0.2,Iris-setosa
# 4.9,3.0,1.4,0.2,Iris-setosa
# 7.0,3.2,4.7,1.4,Iris-versicolor
# 6.4,3.2,4.5,1.5,Iris-versicolor
# 6.3,3.3,6.0,2.5,Iris-virginica
# 5.8,2.7,5.1,1.9,Iris-virginica
import pandas as pd
df = pd.read_csv("data.csv", header=0)

# Extract column names using the first row of a CSV file as a header.
# （CSVファイルの最初の行をヘッダとして，列の名前を抽出する）
# Set the rightmost column in the CSV file as the class.
# （CSVファイルで一番右の列をクラスとする）
target_column = df.columns.values.tolist()[-1]
# Convert class name into ID numbers of 0, 1, 2, ...
# （クラスの文字列を0, 1, 2, ...のID番号に変換する）
target2id = dict((c, i) for i, c in enumerate(set(df[target_column].values.tolist())))

data = {}
data['feature_names'] = df.columns.values.tolist()[:-1]
data['target_names'] = list([c for c, i in sorted(target2id.items(), key=lambda x: x[1])])
data['target'] = np.array([target2id[c] for c in df[target_column].values.tolist()])
data['data'] = np.array(df[data['feature_names']].values.tolist())

In [None]:
data['data'][48:53]

In [None]:
data['feature_names']

In [None]:
data['target'][48:53]

Class indicates iris species: class 0 is `'setosa'`, class 1 is `'versicolor'`, and class 2 is `'virginica'`.
（クラス0が `'setosa'`, クラス1が `'versicolor'`, クラス2が `'virginica'` というアヤメの種類）

In [None]:
data['target_names']

In [None]:
X = torch.tensor(data['data'], dtype=torch.float)
y = torch.tensor(data['target'], dtype=torch.long)

In [None]:
class MLP(nn.Module):
    def __init__(self, xdim, hdim, ydim):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(xdim, hdim)
        self.linear2 = nn.Linear(hdim, ydim)
        
    def forward(self, X):
        a1 = self.linear1(X)
        z1 = F.relu(a1)
        a2 = self.linear2(z1)
        return a2

In [None]:
mlp = MLP(4, 2, 3)
optimizer = optim.SGD(mlp.parameters(), lr = 0.1)
for epoch in range(100):
    optimizer.zero_grad()
    a = mlp(X)
    loss = F.cross_entropy(a, y)
    loss.backward()
    optimizer.step()
    print(loss)

In [None]:
a = mlp(X)
pred_proba = F.softmax(a, dim=1)
pred_y = torch.argmax(pred_proba, dim=1)
print(pred_y)
print(pred_y == y)
accuracy = (pred_y == y).sum() / float(len(pred_y))
print(accuracy)

# Splitting dataset into train/test sets

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train_idx, test_idx = train_test_split(np.arange(len(X)), test_size=0.3, random_state=42)
train_X = torch.tensor(data['data'][train_idx], dtype=torch.float)
train_y = torch.tensor(data['target'][train_idx], dtype=torch.long)
test_X = torch.tensor(data['data'][test_idx], dtype=torch.float)
test_y = torch.tensor(data['target'][test_idx], dtype=torch.long)

In [None]:
print(len(train_X), len(train_y))
print(len(test_X), len(test_y))

In [None]:
mlp = MLP(4, 3, 3)
optimizer = optim.SGD(mlp.parameters(), lr = 0.1)
for epoch in range(100):
    optimizer.zero_grad()
    a = mlp(train_X)
    loss = F.cross_entropy(a, train_y)
    loss.backward()
    optimizer.step()
    print(loss)

In [None]:
a = mlp(test_X)
pred_proba = F.softmax(a, dim=1)
pred_y = torch.argmax(pred_proba, dim=1)
print(pred_y)
print(pred_y == test_y)
accuracy = (pred_y == test_y).sum() / float(len(pred_y))
print(accuracy)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(test_y.data, pred_y.data, target_names=data['target_names']))