In [1]:
# https://www.codexa.net/pytorch-python/
# インポート
import torch
print(torch.__version__)

1.7.1+cu110


In [None]:
x = torch.Tensor(2,2)
print(x)

In [None]:
# リストの作成
list = [[1,2,3],[4,5,6]]
x2 = torch.Tensor(list)
x2

In [None]:
# PyTorchテンソルのサイズ確認
x2.size()

In [None]:
# 一様分布の乱数生成
print(torch.rand(2,2))
# 正規分布の乱数生成
print(torch.randn(2,2))

In [None]:
# 単位行列の作成
print(torch.eye(3,3))
# 空のテンソルの作成
print(torch.empty(4,1))
# 等間隔の数列
print(torch.linspace(0, 100, 11))

In [None]:
# テンソルの作成
x = torch.Tensor([[2, 2], [1, 1]])
y = torch.Tensor([[3, 2], [1, 2]])
# 表示
print(x)
print(y)

# テンソルの足し算
print(x + y)
print(torch.add(x, y))

# テンソルのアダマール積（要素の乗法）
print(x * y)
print(torch.mul(x, y))

# テンソルの積（ドットプロダクト）
print(torch.mm(x, y))

# テンソルの要素の和
print(torch.sum(x))

# テンソルの要素の標準偏差
print(torch.std(x))

# テンソルの要素の算術平均
print(torch.mean(x))

# PyTorchを使って線形回帰

In [1]:
# ライブラリのインポート
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
 
# PyTorchのインポート
import torch 
import torch.nn as nn
 
# 評価指標（Scikit-learn）
from sklearn.metrics import mean_squared_error

# CSVファイルの読み込み
ramen = pd.read_csv('./archive/ramen-ratings.csv')



In [None]:
# データの最初の5行を表示
ramen.head()

In [None]:
print(ramen.shape)

In [None]:
ramen['Country'].value_counts()[0:20]

In [2]:
# 未評価（Unrated）のラーメンを除外
mask = ramen.index[ramen['Stars'] == 'Unrated']
ramen = ramen.drop(index = mask)
ramen.shape

(2577, 7)

In [3]:
# Starsのデータ型を確認
print(ramen['Stars'].dtype)
# float型へ変換
ramen['Stars'] = ramen['Stars'].astype(float)
# 改めてデータ型を確認
print(ramen['Stars'].dtype)

object
float64


In [4]:
# 不要なカラムをデータから削除
ramen = ramen.drop(columns=['Review #', 'Top Ten', 'Variety'])
ramen.head()

Unnamed: 0,Brand,Style,Country,Stars
0,New Touch,Cup,Japan,3.75
1,Just Way,Pack,Taiwan,1.0
2,Nissin,Cup,USA,2.25
3,Wei Lih,Pack,Taiwan,2.75
4,Ching's Secret,Pack,India,3.75


In [5]:
# 特徴量のダミー変数化
Country = pd.get_dummies(ramen['Country'], prefix='Country', drop_first=True)
Brand = pd.get_dummies(ramen['Brand'], prefix='Brand', drop_first=True)
Style = pd.get_dummies(ramen['Style'], prefix='Style', drop_first=True)
# ダミー変数化した特徴量を結合
ramendf = pd.concat([Country, Brand, Style], axis=1)
# 確認
ramendf.head()

Unnamed: 0,Country_Bangladesh,Country_Brazil,Country_Cambodia,Country_Canada,Country_China,Country_Colombia,Country_Dubai,Country_Estonia,Country_Fiji,Country_Finland,...,Brand_Yum-Mie,Brand_Zow Zow,Brand_iMee,Brand_iNoodle,Style_Bowl,Style_Box,Style_Can,Style_Cup,Style_Pack,Style_Tray
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [6]:
# 特徴量とターゲットへ分割
X = np.array(ramendf, dtype=np.float32) 
y = np.array(ramen[['Stars']], dtype=np.float32)

In [7]:
# 線形回帰モデル
model = nn.Linear(397, 1)

In [8]:
# 損失関数
loss= nn.MSELoss()
# 最適化関数
optimizer = torch.optim.SGD(model.parameters(), lr=0.6)

In [9]:
# モデル訓練
for epoch in range(1000):
    # ステージ1 Numpy配列からテンソルへ変換
    inputs = torch.from_numpy(X)
    targets = torch.from_numpy(y)

    # ステージ2 推測値を出力して誤差（コスト）を算出
    outputs =model(inputs)
    cost = loss(outputs, targets)

    # ステージ3 誤差逆伝播（バックプロパゲーション）
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # ステージ4 50回ごとにコストを表示
    if (epoch + 1) % 100 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 1000, cost.item()))

  Variable._execution_engine.run_backward(


Epoch [100/1000], Loss: 0.7759
Epoch [200/1000], Loss: 0.7272
Epoch [300/1000], Loss: 0.6983
Epoch [400/1000], Loss: 0.6785
Epoch [500/1000], Loss: 0.6638
Epoch [600/1000], Loss: 0.6522
Epoch [700/1000], Loss: 0.6429
Epoch [800/1000], Loss: 0.6352
Epoch [900/1000], Loss: 0.6287
Epoch [1000/1000], Loss: 0.6231


In [10]:
# 予測を出力
y_pred = model(torch.from_numpy(X)).data.numpy()
print(y_pred[0:5])
print(y[0:5])

[[4.1107054]
 [2.9211955]
 [3.4576013]
 [3.4200048]
 [3.706513 ]]
[[3.75]
 [1.  ]
 [2.25]
 [2.75]
 [3.75]]


In [11]:
# 平均二乗誤差を確認
mean_squared_error(y, y_pred)

0.6230184