In [66]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

np.set_printoptions(suppress=True) # turn of scientific notation
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import load_iris
from sklearn import metrics

torch.__version__

'2.0.1'

In [2]:
torch.backends.mps.is_available()

True

In [3]:
device = torch.device('mps')
x = torch.ones(5, device=device)
x

tensor([1., 1., 1., 1., 1.], device='mps:0')

In [4]:
# matrix 1x2
mat_1 = torch.tensor([[3,3]], device=device)
print(mat_1)
print(mat_1.size())

tensor([[3, 3]], device='mps:0')
torch.Size([1, 2])


In [5]:
x = torch.tensor([1,2], device=device)
y = torch.tensor([2,3], device=device)
print(x)

tensor([1, 2], device='mps:0')


In [6]:
sub = torch.subtract(x,y)
print(sub)

tensor([-1, -1], device='mps:0')


### MPG dataset

In [7]:
df = pd.read_csv('https://raw.githubusercontent.com/rishi-wqd190004/DS_ML_Lectures/main/datasets/auto-mpg.csv', na_values=["NA", "?"])
df.head(2)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130.0,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,1,buick skylark 320


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    392 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model year    398 non-null    int64  
 7   origin        398 non-null    int64  
 8   car name      398 non-null    object 
dtypes: float64(4), int64(4), object(1)
memory usage: 28.1+ KB


In [9]:
cars = df['car name']
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median())

In [10]:
df.isna().sum()

mpg             0
cylinders       0
displacement    0
horsepower      0
weight          0
acceleration    0
model year      0
origin          0
car name        0
dtype: int64

In [11]:
df['horsepower'].isnull().sum()

0

In [12]:
x = df[['cylinders', 'displacement', 'horsepower', 'weight','acceleration', 'model year', 'origin']].values
x.shape

(398, 7)

In [13]:
x

array([[  8. , 307. , 130. , ...,  12. ,  70. ,   1. ],
       [  8. , 350. , 165. , ...,  11.5,  70. ,   1. ],
       [  8. , 318. , 150. , ...,  11. ,  70. ,   1. ],
       ...,
       [  4. , 135. ,  84. , ...,  11.6,  82. ,   1. ],
       [  4. , 120. ,  79. , ...,  18.6,  82. ,   1. ],
       [  4. , 119. ,  82. , ...,  19.4,  82. ,   1. ]])

In [14]:
print(type(x))

<class 'numpy.ndarray'>


In [15]:
y = df['mpg'].values

In [16]:
y.shape

(398,)

In [17]:
# converting to torch
# x_ = torch.from_numpy(x)
x = torch.tensor(x, device=device, dtype=torch.float32)
y = torch.tensor(y, device=device, dtype=torch.float32)

### Model

In [18]:
model = nn.Sequential(
    nn.Linear(x.shape[1], 50),
    nn.ReLU(),
    nn.Linear(50,25),
    nn.ReLU(),
    nn.Linear(25,1)
)

# compiling the model
model = torch.compile(model, backend='aot_eager').to(device)

# Loss function for model
loss_fn = nn.MSELoss()

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [19]:
# train for 1000 epoch
for epoch in range(1000):
    optimizer.zero_grad()
    out = model(x).flatten()
    loss = loss_fn(out, y)
    loss.backward()
    optimizer.step()

    # display 100 epochs
    if epoch % 100 == 0:
        print('Epoch {}, Loss: {}'.format(epoch, loss.item()))

Epoch 0, Loss: 7615.0634765625
Epoch 100, Loss: 104.96875
Epoch 200, Loss: 32.324405670166016
Epoch 300, Loss: 17.576492309570312
Epoch 400, Loss: 11.433490753173828
Epoch 500, Loss: 1766.292724609375
Epoch 600, Loss: 20.34697723388672
Epoch 700, Loss: 15.107710838317871
Epoch 800, Loss: 12.473526000976562
Epoch 900, Loss: 11.243343353271484


In [20]:
pred = model(x)
print(pred.shape)
print(pred[:10])

torch.Size([398, 1])
tensor([[15.3066],
        [14.4538],
        [15.0534],
        [15.3120],
        [15.1110],
        [12.5219],
        [12.0455],
        [12.1441],
        [12.0672],
        [13.3127]], device='mps:0', grad_fn=<SliceBackward0>)


- bringing back predictions from cpu 

In [23]:
score = np.sqrt(metrics.mean_squared_error(pred.cpu().detach(), y.cpu().detach()))
print('Fianl RMSE: {}'.format(score))

Fianl RMSE: 3.2471938133239746


In [24]:
# scoring using torch
score = torch.sqrt(nn.functional.mse_loss(pred.flatten(), y))
print('Fianl RMSE: {}'.format(score))

Fianl RMSE: 3.2471940517425537


### Iris data classification

In [55]:
# loading iris from sklearn.datasets
x, y = load_iris(as_frame=True, return_X_y=True)
x.shape, y.shape

((150, 4), (150,))

In [56]:
y

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: target, Length: 150, dtype: int64

In [57]:
tar_name = load_iris()['target_names']
tar_name

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [30]:
x.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [58]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int64

In [59]:
cols = x.columns

In [60]:
y.value_counts()

target
0    50
1    50
2    50
Name: count, dtype: int64

In [61]:
# label encoding the target values
x = x.values
le = LabelEncoder()
y = le.fit_transform(y)
species = le.classes_

In [62]:
# converting to torch tensors and moving to mps:0 (metal GPU)
x = torch.tensor(x, device=device, dtype=torch.float32)
y = torch.tensor(y, device=device, dtype=torch.int64)

In [63]:
# modelling
model = nn.Sequential(
    nn.Linear(x.shape[1], 50),
    nn.ReLU(),
    nn.Linear(50,25),
    nn.ReLU(),
    nn.Linear(25, len(species)),
    nn.LogSoftmax(dim=1)
)

model = torch.compile(model, backend="aot_eager").to(device)

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()
for epoch in range(1000):
    optimizer.zero_grad()
    out = model(x)
    loss = criterion(out, y)
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print("Epoch {}, loss {}".format(epoch, loss.item()))

Epoch 0, loss 1.1019272804260254
Epoch 100, loss 0.04856609180569649
Epoch 200, loss 0.04349017143249512
Epoch 300, loss 0.04247387871146202
Epoch 400, loss 0.04038991779088974
Epoch 500, loss 0.039979755878448486
Epoch 600, loss 0.040963757783174515
Epoch 700, loss 0.03973352164030075
Epoch 800, loss 0.04006851837038994
Epoch 900, loss 0.03969661891460419


In [64]:
print(species)

[0 1 2]


In [67]:
# evaluating model
model.eval()
pred = model(x)
print('Shape of prediction: {}', pred.shape)
print(pred[:10])

Shape of prediction: {} torch.Size([150, 3])
tensor([[-1.9073e-06, -1.3178e+01, -5.6956e+01],
        [-5.6028e-06, -1.2101e+01, -5.3266e+01],
        [-3.0994e-06, -1.2675e+01, -5.4840e+01],
        [-6.0797e-06, -1.2010e+01, -5.2538e+01],
        [-1.5497e-06, -1.3358e+01, -5.7390e+01],
        [-8.5830e-06, -1.1667e+01, -5.3999e+01],
        [-3.8147e-06, -1.2482e+01, -5.3740e+01],
        [-2.9802e-06, -1.2740e+01, -5.5382e+01],
        [-7.6294e-06, -1.1780e+01, -5.1529e+01],
        [-3.9339e-06, -1.2454e+01, -5.4622e+01]], device='mps:0',
       grad_fn=<SliceBackward0>)


In [68]:
# moving to cpu to print float values
print(pred[:10].cpu().detach().numpy())

[[ -0.00000191 -13.177552   -56.95626   ]
 [ -0.0000056  -12.101255   -53.26559   ]
 [ -0.0000031  -12.674944   -54.839558  ]
 [ -0.00000608 -12.010442   -52.5384    ]
 [ -0.00000155 -13.35756    -57.390022  ]
 [ -0.00000858 -11.6669445  -53.998894  ]
 [ -0.00000381 -12.4823065  -53.739677  ]
 [ -0.00000298 -12.740368   -55.38216   ]
 [ -0.00000763 -11.780072   -51.528706  ]
 [ -0.00000393 -12.453944   -54.622013  ]]


In [None]:
## next to read why cpu().detach().numpy (check stack overflow)
## finish this notebook and move to other (feature encode)