This examples comes from `Machine Learning with PyTorch and Scikit-Learn`

https://github.com/rasbt/machine-learning-book/blob/main/ch13/ch13_part2.ipynb

In [2]:
import pandas as pd
import helpsk.pandas as hp

In [21]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']

df = pd.read_csv(url, names=column_names,
                 na_values = "?", comment='\t',
                 sep=" ", skipinitialspace=True)

df.tail()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
393,27.0,4,140.0,86.0,2790.0,15.6,82,1
394,44.0,4,97.0,52.0,2130.0,24.6,82,2
395,32.0,4,135.0,84.0,2295.0,11.6,82,1
396,28.0,4,120.0,79.0,2625.0,18.6,82,1
397,31.0,4,119.0,82.0,2720.0,19.4,82,1


In [22]:
hp.numeric_summary(df)

Unnamed: 0,# of Non-Nulls,# of Nulls,% Nulls,# of Zeros,% Zeros,Mean,St Dev.,Coef of Var,Skewness,Kurtosis,Min,10%,25%,50%,75%,90%,Max
MPG,398,0,0.0%,0,0.0%,23.5,7.8,0.3,0.5,-0.5,9.0,14.0,17.5,23.0,29.0,34.3,46.6
Cylinders,398,0,0.0%,0,0.0%,5.5,1.7,0.3,0.5,-1.4,3.0,4.0,4.0,4.0,8.0,8.0,8.0
Displacement,398,0,0.0%,0,0.0%,193.4,104.3,0.5,0.7,-0.8,68.0,90.0,104.2,148.5,262.0,350.0,455.0
Horsepower,392,6,2.0%,0,0.0%,104.5,38.5,0.4,1.1,0.7,46.0,67.0,75.0,93.5,126.0,157.7,230.0
Weight,398,0,0.0%,0,0.0%,2970.4,846.8,0.3,0.5,-0.8,1613.0,1988.5,2223.8,2803.5,3608.0,4275.2,5140.0
Acceleration,398,0,0.0%,0,0.0%,15.6,2.8,0.2,0.3,0.4,8.0,12.0,13.8,15.5,17.2,19.0,24.8
Model Year,398,0,0.0%,0,0.0%,76.0,3.7,0.1,0.0,-1.2,70.0,71.0,73.0,76.0,79.0,81.0,82.0
Origin,398,0,0.0%,0,0.0%,1.6,0.8,0.5,0.9,-0.8,1.0,1.0,1.0,1.0,2.0,3.0,3.0


In [23]:
hp.non_numeric_summary(df)

---

In [24]:
print(df.isna().sum())

df = df.dropna()
df = df.reset_index(drop=True)
df.tail()

MPG             0
Cylinders       0
Displacement    0
Horsepower      6
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64


Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
387,27.0,4,140.0,86.0,2790.0,15.6,82,1
388,44.0,4,97.0,52.0,2130.0,24.6,82,2
389,32.0,4,135.0,84.0,2295.0,11.6,82,1
390,28.0,4,120.0,79.0,2625.0,18.6,82,1
391,31.0,4,119.0,82.0,2720.0,19.4,82,1


In [25]:
import sklearn
import sklearn.model_selection

df_train, df_test = sklearn.model_selection.train_test_split(df, train_size=0.8, random_state=1)
train_stats = df_train.describe().transpose()
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MPG,313.0,23.404153,7.666909,9.0,17.5,23.0,29.0,46.6
Cylinders,313.0,5.402556,1.701506,3.0,4.0,4.0,8.0,8.0
Displacement,313.0,189.51278,102.675646,68.0,104.0,140.0,260.0,455.0
Horsepower,313.0,102.929712,37.919046,46.0,75.0,92.0,120.0,230.0
Weight,313.0,2961.198083,848.602146,1613.0,2219.0,2755.0,3574.0,5140.0
Acceleration,313.0,15.704473,2.725399,8.5,14.0,15.5,17.3,24.8
Model Year,313.0,75.929712,3.675305,70.0,73.0,76.0,79.0,82.0
Origin,313.0,1.591054,0.807923,1.0,1.0,1.0,2.0,3.0


In [83]:
numeric_column_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration']

df_train_norm, df_test_norm = df_train.copy(), df_test.copy()

for col_name in numeric_column_names:
    mean_train = train_stats.loc[col_name, 'mean']
    std_train  = train_stats.loc[col_name, 'std']
    df_train_norm.loc[:, col_name] = (df_train_norm.loc[:, col_name] - mean_train)/std_train
    # the test dataset is normalized based on the mean & st dev from the training dataset
    df_test_norm.loc[:, col_name] = (df_test_norm.loc[:, col_name] - mean_train)/std_train
    
df_train_norm.tail()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
203,28.0,-0.824303,-0.90102,-0.736562,-0.950031,0.255202,76,3
255,19.4,0.351127,0.4138,-0.340982,0.29319,0.548737,78,1
72,13.0,1.526556,1.144256,0.713897,1.339617,-0.625403,72,1
235,30.5,-0.824303,-0.89128,-1.053025,-1.072585,0.475353,77,1
37,14.0,1.526556,1.563051,1.636916,1.47042,-1.35924,71,1


In [84]:
df_train_norm['Model Year'].values

array([81, 78, 74, 80, 81, 73, 81, 80, 79, 74, 76, 72, 81, 77, 70, 73, 79,
       72, 78, 78, 79, 73, 76, 73, 77, 73, 78, 81, 78, 71, 75, 79, 81, 81,
       72, 70, 79, 75, 73, 80, 82, 82, 71, 78, 79, 73, 72, 81, 76, 75, 81,
       82, 79, 73, 74, 74, 77, 70, 78, 73, 74, 73, 71, 73, 70, 75, 70, 75,
       70, 73, 75, 76, 72, 71, 75, 81, 82, 82, 74, 71, 77, 77, 76, 76, 82,
       79, 78, 74, 80, 73, 71, 73, 77, 74, 82, 75, 70, 78, 75, 78, 71, 80,
       77, 77, 73, 81, 75, 72, 82, 73, 70, 82, 73, 81, 80, 80, 70, 71, 71,
       78, 71, 78, 71, 80, 82, 71, 78, 71, 81, 72, 75, 74, 71, 77, 72, 78,
       80, 78, 82, 82, 82, 71, 76, 75, 78, 71, 73, 75, 77, 79, 71, 70, 73,
       75, 72, 71, 76, 78, 74, 70, 82, 76, 76, 76, 74, 79, 73, 76, 81, 74,
       82, 81, 76, 78, 81, 77, 80, 77, 72, 80, 80, 79, 76, 73, 73, 73, 79,
       75, 81, 71, 75, 76, 76, 76, 80, 73, 77, 74, 79, 72, 73, 76, 74, 74,
       76, 75, 80, 70, 78, 73, 76, 77, 72, 79, 71, 81, 70, 81, 72, 76, 72,
       78, 82, 74, 81, 70

In [85]:
import torch

boundaries = torch.tensor([73, 76, 79])
 
v = torch.tensor(df_train_norm['Model Year'].values)
df_train_norm['Model Year Bucketed'] = torch.bucketize(v, boundaries, right=True)

v = torch.tensor(df_test_norm['Model Year'].values)
df_test_norm['Model Year Bucketed'] = torch.bucketize(v, boundaries, right=True)

numeric_column_names.append('Model Year Bucketed')
numeric_column_names

['Cylinders',
 'Displacement',
 'Horsepower',
 'Weight',
 'Acceleration',
 'Model Year Bucketed']

In [86]:
v

tensor([72, 75, 81, 73, 82, 77, 72, 73, 72, 80, 79, 80, 79, 77, 82, 70, 82, 80,
        70, 75, 74, 74, 73, 77, 70, 79, 77, 76, 75, 81, 78, 77, 72, 82, 77, 76,
        75, 78, 82, 70, 77, 72, 73, 74, 73, 75, 73, 82, 76, 73, 72, 78, 80, 82,
        80, 75, 79, 70, 79, 78, 82, 76, 81, 78, 76, 75, 76, 70, 79, 71, 80, 79,
        76, 80, 70, 79, 70, 73, 72])

In [87]:
boundaries

tensor([73, 76, 79])

In [88]:
torch.bucketize(v, boundaries=boundaries, right=True)

tensor([0, 1, 3, 1, 3, 2, 0, 1, 0, 3, 3, 3, 3, 2, 3, 0, 3, 3, 0, 1, 1, 1, 1, 2,
        0, 3, 2, 2, 1, 3, 2, 2, 0, 3, 2, 2, 1, 2, 3, 0, 2, 0, 1, 1, 1, 1, 1, 3,
        2, 1, 0, 2, 3, 3, 3, 1, 3, 0, 3, 2, 3, 2, 3, 2, 2, 1, 2, 0, 3, 0, 3, 3,
        2, 3, 0, 3, 0, 1, 0])

In [89]:
hp.numeric_summary(df_train_norm)

Unnamed: 0,# of Non-Nulls,# of Nulls,% Nulls,# of Zeros,% Zeros,Mean,St Dev.,Coef of Var,Skewness,Kurtosis,Min,10%,25%,50%,75%,90%,Max
MPG,313,0,0.0%,0,0.0%,23.4,7.7,0.3,0.5,-0.4,9.0,14.0,17.5,23.0,29.0,34.0,46.6
Cylinders,313,0,0.0%,0,0.0%,-0.0,1.0,-1.6781270040082914e+16,0.6,-1.3,-1.4,-0.8,-0.8,-0.8,1.5,1.5,1.5
Displacement,313,0,0.0%,0,0.0%,0.0,1.0,7661014583516110.0,0.7,-0.8,-1.2,-1.0,-0.8,-0.5,0.7,1.6,2.6
Horsepower,313,0,0.0%,0,0.0%,0.0,1.0,2.2025416927608828e+16,1.1,0.8,-1.5,-1.0,-0.7,-0.3,0.5,1.3,3.4
Weight,313,0,0.0%,0,0.0%,-0.0,1.0,-8390635020041464.0,0.5,-0.8,-1.6,-1.1,-0.9,-0.2,0.7,1.5,2.6
Acceleration,313,0,0.0%,0,0.0%,-0.0,1.0,-3037988541739149.0,0.3,0.4,-2.6,-1.2,-0.6,-0.1,0.6,1.3,3.3
Model Year,313,0,0.0%,0,0.0%,75.9,3.7,0.1,0.0,-1.2,70.0,71.0,73.0,76.0,79.0,81.0,82.0
Origin,313,0,0.0%,0,0.0%,1.6,0.8,0.5,0.9,-0.9,1.0,1.0,1.0,1.0,2.0,3.0,3.0
Model Year Bucketed,313,0,0.0%,68,22.0%,1.6,1.1,0.7,-0.1,-1.3,0.0,0.0,1.0,2.0,3.0,3.0,3.0


In [90]:
hp.non_numeric_summary(df_test_norm)

In [91]:
set(df_train_norm['Origin'])

{1, 2, 3}

In [92]:
[x % 3 for x in set(df_train_norm['Origin'])]

[1, 2, 0]

In [93]:
from torch.nn.functional import one_hot

total_origin = len(set(df_train_norm['Origin']))
print(total_origin)

# convert 1, 2, 3 to 1, 2, 0 respectively
origin_encoded = one_hot(torch.from_numpy(df_train_norm['Origin'].values) % total_origin)
# convert numeric columns to a tensor
x_train_numeric = torch.tensor(df_train_norm[numeric_column_names].values)
# concatenate the numic tensor with the encoded origin column which was already a tensor
x_train = torch.cat([x_train_numeric, origin_encoded], 1).float()
 
origin_encoded = one_hot(torch.from_numpy(df_test_norm['Origin'].values) % total_origin)
x_test_numeric = torch.tensor(df_test_norm[numeric_column_names].values)
x_test = torch.cat([x_test_numeric, origin_encoded], 1).float()

3


In [94]:
x_train_numeric

tensor([[-8.2430e-01, -5.3092e-01, -4.9921e-01, -5.5526e-01, -1.6412e-03,
          3.0000e+00],
        [ 3.5113e-01,  3.4562e-01,  1.8646e-01,  7.7634e-01,  1.0991e+00,
          2.0000e+00],
        [-8.2430e-01, -8.9128e-01, -5.2559e-01, -8.7461e-01,  2.9189e-01,
          1.0000e+00],
        ...,
        [ 1.5266e+00,  1.1443e+00,  7.1390e-01,  1.3396e+00, -6.2540e-01,
          0.0000e+00],
        [-8.2430e-01, -8.9128e-01, -1.0530e+00, -1.0726e+00,  4.7535e-01,
          2.0000e+00],
        [ 1.5266e+00,  1.5631e+00,  1.6369e+00,  1.4704e+00, -1.3592e+00,
          0.0000e+00]], dtype=torch.float64)

In [95]:
origin_encoded

tensor([[1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [0, 0, 1],
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1],
        [0, 1, 0],
        [1, 0, 0],
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1],
        [0, 1, 0],
        [1, 0, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 0, 1],
        [0, 1, 0],
        [0, 1, 0],
        [0, 0, 1],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [1, 0, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [1, 0, 0],
        [1, 0, 0],
        [0, 1, 0],
        [0, 1, 0],
        [1, 0, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [1, 0, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 

In [96]:
x_train.shape

torch.Size([313, 9])

In [97]:
x_train

tensor([[-0.8243, -0.5309, -0.4992,  ...,  0.0000,  1.0000,  0.0000],
        [ 0.3511,  0.3456,  0.1865,  ...,  0.0000,  1.0000,  0.0000],
        [-0.8243, -0.8913, -0.5256,  ...,  0.0000,  0.0000,  1.0000],
        ...,
        [ 1.5266,  1.1443,  0.7139,  ...,  0.0000,  1.0000,  0.0000],
        [-0.8243, -0.8913, -1.0530,  ...,  0.0000,  1.0000,  0.0000],
        [ 1.5266,  1.5631,  1.6369,  ...,  0.0000,  1.0000,  0.0000]])

In [98]:
y_train = torch.tensor(df_train_norm['MPG'].values).float()
y_test = torch.tensor(df_test_norm['MPG'].values).float()

In [99]:
y_test

tensor([23.0000, 29.0000, 32.4000, 19.0000, 38.0000, 33.5000, 26.0000, 14.0000,
        28.0000, 32.4000, 21.5000, 41.5000, 17.0000, 33.5000, 29.0000, 17.0000,
        36.0000, 43.4000, 14.0000, 20.0000, 26.0000, 16.0000, 15.0000, 15.5000,
        15.0000, 18.5000, 31.5000, 20.0000, 21.0000, 30.0000, 19.2000, 24.5000,
        13.0000, 22.0000, 20.5000, 17.5000, 18.0000, 19.2000, 38.0000, 27.0000,
        15.5000, 14.0000, 20.0000, 19.0000, 18.0000, 15.0000, 11.0000, 31.0000,
        22.0000, 13.0000, 17.0000, 20.5000, 28.0000, 32.0000, 29.8000, 18.0000,
        28.8000, 21.0000, 33.5000, 36.1000, 38.0000, 28.0000, 37.0000, 19.9000,
        22.5000, 19.0000, 19.0000, 14.0000, 31.9000, 12.0000, 37.0000, 19.2000,
        13.0000, 34.3000, 26.0000, 35.7000, 14.0000, 13.0000, 13.0000])

In [100]:
from torch.utils.data import DataLoader, TensorDataset

train_ds = TensorDataset(x_train, y_train)
batch_size = 8
torch.manual_seed(1)
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [101]:
import torch
import torch.nn as nn

hidden_units = [8, 4]
input_size = x_train.shape[1]
print(f"Number of features: {input_size}")

all_layers = []
for hidden_unit in hidden_units:
    print(f"Creating Linear layer with {input_size} input units and {hidden_unit} hidden units.")
    layer = nn.Linear(input_size, hidden_unit)
    all_layers.append(layer)
    all_layers.append(nn.ReLU())
    input_size = hidden_unit

print(f"Creating output layer Linear layer with {input_size} input units and {hidden_units[-1]} output units.")
all_layers.append(nn.Linear(hidden_units[-1], 1))

model = nn.Sequential(*all_layers)

model

Number of features: 9
Creating Linear layer with 9 input units and 8 hidden units.
Creating Linear layer with 8 input units and 4 hidden units.
Creating output layer Linear layer with 4 input units and 4 output units.


Sequential(
  (0): Linear(in_features=9, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=4, bias=True)
  (3): ReLU()
  (4): Linear(in_features=4, out_features=1, bias=True)
)

In [102]:
# https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model = model.to(device)

cpu


In [103]:
next(model.parameters())

Parameter containing:
tensor([[ 0.1718, -0.1471, -0.0646,  0.1565, -0.3138,  0.1999, -0.0686,  0.1696,
          0.0463],
        [-0.0408,  0.0925,  0.0164,  0.1217, -0.1299, -0.0243, -0.0300,  0.0483,
         -0.0013],
        [ 0.2914,  0.1037, -0.1241, -0.2013, -0.0559, -0.1438, -0.1068,  0.0160,
          0.1987],
        [ 0.1812, -0.3259,  0.2066,  0.0931,  0.3162,  0.2200, -0.3037, -0.3169,
         -0.1608],
        [ 0.2927, -0.0555,  0.1427, -0.1549,  0.3271, -0.1410,  0.2500,  0.0039,
         -0.1756],
        [ 0.1713, -0.1769,  0.0980, -0.0963, -0.0365, -0.3205, -0.1589,  0.1809,
         -0.0810],
        [ 0.3320,  0.2672, -0.0156, -0.2225,  0.2030,  0.1035, -0.2155,  0.2165,
          0.2024],
        [ 0.2956, -0.1869, -0.0549, -0.0065,  0.0487, -0.2530, -0.2365,  0.1813,
         -0.0782]], requires_grad=True)

In [104]:
[x[0].size() for x in model.parameters()]

[torch.Size([9]),
 torch.Size([]),
 torch.Size([8]),
 torch.Size([]),
 torch.Size([4]),
 torch.Size([])]

In [105]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [106]:
torch.manual_seed(1)
num_epochs = 200
log_epochs = 20 
for epoch in range(num_epochs):
    loss_hist_train = 0
    for x_batch, y_batch in train_dl:
        pred = model(x_batch)[:, 0]
        loss = loss_fn(pred, y_batch)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()  # Sets the gradients of all optimized torch.Tensor s to zero.
        # https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch
        # In PyTorch, for every mini-batch during the training phase, we typically want to
        # explicitly set the gradients to zero before starting to do backpropragation
        # (i.e., updating the Weights and biases) because PyTorch accumulates the gradients on
        # subsequent backward passes.
        loss_hist_train += loss.item()
    if epoch == 0 or epoch % log_epochs==0:
        print(f'Epoch {epoch}  Loss {loss_hist_train/len(train_dl):.4f}') 

Epoch 0  Loss 536.1047
Epoch 20  Loss 8.4361
Epoch 40  Loss 7.8695
Epoch 60  Loss 7.1891
Epoch 80  Loss 6.7064
Epoch 100  Loss 6.7603
Epoch 120  Loss 6.3107
Epoch 140  Loss 6.6884
Epoch 160  Loss 6.7549
Epoch 180  Loss 6.2029


In [107]:
# Context-manager that disabled gradient calculation.
# Disabling gradient calculation is useful for inference, when you are sure that you will not call
# Tensor.backward().
# It will reduce memory consumption for computations that would otherwise have requires_grad=True.
with torch.no_grad():
    pred = model(x_test.float())[:, 0]
    loss = loss_fn(pred, y_test)
    print(f'Test MSE: {loss.item():.4f}')
    print(f'Test MAE: {nn.L1Loss()(pred, y_test).item():.4f}')

Test MSE: 9.5907
Test MAE: 2.1177


---

In [72]:
next(model.parameters())

Parameter containing:
tensor([[-3.6008e-01, -9.4660e-01, -7.9548e-02,  2.6555e-02, -6.0029e-01,
         -7.7963e-02, -4.3352e-02, -6.4887e-01,  8.9551e-01],
        [-2.9238e-02, -6.0061e-01, -4.8932e-01, -9.5207e-01, -3.1235e-01,
          5.4767e-01,  6.5131e-02, -7.9877e-01,  3.3817e-01],
        [ 2.3137e-01, -1.9330e-01, -4.9405e-02, -5.0368e-01, -3.1565e-02,
          9.6947e-01,  1.7058e-01,  5.2290e-02,  3.4418e-01],
        [-4.1887e-02, -1.2906e-01, -9.1869e-04, -9.9718e-01,  3.5891e-01,
          3.2248e-01, -8.1820e-02,  1.9410e-01,  6.1055e-01],
        [ 2.0610e-01,  3.0306e-01,  8.8509e-03,  3.3724e-01, -1.4698e-02,
         -2.5827e-01, -3.1231e-01,  6.6519e-01,  1.9471e-01],
        [ 1.1136e-01, -1.1750e-01,  2.9848e-01, -7.5537e-02,  2.9701e-01,
         -9.4273e-02, -1.9617e-01,  2.3970e-02, -1.4763e-01],
        [-1.0121e-01, -1.3847e-01, -3.6190e-01, -1.5773e-01,  2.2174e-01,
         -1.4071e-01,  6.0477e-02, -3.5173e-03,  4.6490e-01],
        [-3.7761e-01, -5.1