<a href="https://colab.research.google.com/github/manuelrucci7/deep-learning-course/blob/main/colab/SklearVsPythonTrainingLoop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Machine Learning vs Deep Learning

## Machine Learning

In [None]:
# Dati Numpy
import numpy as np
import torch
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
import xgboost as xgb
from sklearn.model_selection import train_test_split
import pickle

# Load Data
X1 = torch.linspace(-2,8,100)
X2 = torch.linspace(0,10,100)
X = torch.stack((X1,X2),dim=1)
Y = torch.exp(0.2*X2)*torch.sin(3*X1) - 10*torch.cos(X1)

# Split data
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,shuffle=True,random_state=13)

# Train the model, possible chocie: RandomForestRegressor, GradientBoost, ExtraTree
model = make_pipeline(PolynomialFeatures(2), GradientBoostingRegressor())
model = model.fit(X_train,Y_train)
Y_train_hat = model.predict(X_train)

# Test the model
Y_test_hat = model.predict(X_test)

# Test metric
mse = np.sum( (Y_test_hat - Y_test.numpy() )**2 ) / len(Y_test)
mae = np.sum( np.abs(Y_test_hat - Y_test.numpy() )) / len(Y_test)
print(f"MSE: {mse}")
print(f"MAE: {mae}")

# Save Model
filename = 'model.sav'
pickle.dump(model, open(filename, 'wb'))

# Load Model
loaded_model = pickle.load(open(filename, 'rb'))
Y_train_hat = loaded_model.predict(X_train)
Y_test_hat = loaded_model.predict(X_test)

# Plot results
fig = go.Figure()
fig.add_traces( go.Scatter3d(x=X_train[:,0], y=X_train[:,1], z=Y_train,name="train",mode="markers") )
fig.add_traces( go.Scatter3d(x=X_train[:,0], y=X_train[:,1], z=Y_train_hat,name="train_hat",mode="markers") )
fig.show()


MSE: 0.7735275697395319
MAE: 0.7324304411581622


## Deep Learning

In [None]:
import torch
import numpy as np

# Create a dataloader pytorch
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, x,y):
        self.x = torch.from_numpy(x)
        self.y = torch.from_numpy(y)
        self.x = self.x.type(torch.float32)
        self.y = self.y.type(torch.float32)
    def __getitem__(self, index):
        x = self.x[index]
        y = self.y[index]
        return x, y
    def __len__(self):
        return self.x.shape[0]

# Creation neural network
class MyNet(torch.nn.Module):
    def __init__(self):
        super(MyNet,self).__init__()
        self.fc1 = torch.nn.Linear(1,100)
        self.fc2 = torch.nn.Linear(100,50)
        self.fc3 = torch.nn.Linear(50,1)
    def forward(self,x):
        # torch.sigmoid, torch.tanh, torch.relu
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        y = self.fc3(x)
        return y

def training_loop(epocs, opt, net, loss_fn, train_dl, test_dl):
    metric_initial = 10000
    for epoch in range(epochs):
        loss = 0.0
        for xb,yb in train_dl:
            # Forward network
            xb = xb.to(device)
            yb = yb.to(device)
            yb_h = net.forward(xb)

            # Error cost function
            loss_b = loss_fn(yb_h, yb)

            # Optimization
            loss_b.backward()
            opt.step()
            opt.zero_grad()

            loss += loss_b.item()

        # Average loss
        loss_avg = loss/len(train_dl)
        print(f"{epoch} Loss: {loss_avg}")

        metric = 0
        for xb,yb in test_dl:
            # Forward network
            xb = xb.to(device)
            yb = yb.to(device)
            yb_h = net.forward(xb)
            #metric = metric + torch.sum(torch.abs(yb_h - yb))
            metric = metric + torch.sum(torch.pow(yb_h - yb,2))

        metric = metric/len(test_dl)

        if (metric<metric_initial):
            metric_initial = metric
            #print(f"{epoch} Metric: {metric}")
            # Save model
            torch.save(net.state_dict(), "best_model.pt")


# Load data
#X1 = np.linspace(-2,8,1000)
#X2 = np.linspace(0,10,1000)
#X = np.stack((X1,X2),axis=1)
#Y = np.exp(0.2*X1)*np.sin(3*X2) - 10*np.cos(X1)
X = np.linspace(-2,8,1000)
Y = np.exp(0.2*X)*np.sin(3*X) - 10*np.cos(X)

# Train e Test
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,shuffle=True,random_state=13)

# Dataset
train_ds = MyDataset(X_train,Y_train)
test_ds = MyDataset(X_test,Y_test)


# Deep learning Params
epochs = 500
batch_size = 4
loss_func = torch.nn.MSELoss(reduction="sum")
device = torch.device("cpu")
net = MyNet()
net = net.to(device)
opt = torch.optim.Adam(net.parameters(),lr=0.01)

# DataLoader
train_dl = torch.utils.data.DataLoader(train_ds,batch_size=4,shuffle=True)
test_dl = torch.utils.data.DataLoader(test_ds,batch_size=4,shuffle=True)

# Training loop
training_loop(epochs,opt,net,loss_func,train_dl,test_dl)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x4 and 1x100)

In [None]:
print(X.shape, Y.shape)

(1000,) (1000,)



Using a target size (torch.Size([4])) that is different to the input size (torch.Size([4, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.



0 Loss: 752.1323566055298
1 Loss: 746.9343520927429
2 Loss: 749.5879149055481
3 Loss: 737.5101820373535
4 Loss: 737.5560471725464
5 Loss: 716.5128674316406
6 Loss: 730.1622488212586
7 Loss: 727.7857795333862
8 Loss: 728.1825234985351
9 Loss: 708.2287178802491
10 Loss: 711.1776638269424
11 Loss: 715.4003203582764
12 Loss: 723.482977347374
13 Loss: 717.80420337677
14 Loss: 709.6411466598511
15 Loss: 703.6559846496582
16 Loss: 716.8623174095154
17 Loss: 717.0407675552368
18 Loss: 712.922045249939
19 Loss: 714.7420168113708
20 Loss: 707.6600749969482
21 Loss: 713.543226776123
22 Loss: 712.3163794612884
23 Loss: 707.8834223937988
24 Loss: 702.6296193504334
25 Loss: 712.3807767105103
26 Loss: 712.5984010791778
27 Loss: 711.4209154129028
28 Loss: 701.2154746723176
29 Loss: 710.3869367599488
30 Loss: 711.6154314804077
31 Loss: 704.8115641212463
32 Loss: 708.520270614624
33 Loss: 714.8960436630249
34 Loss: 711.8672352313995
35 Loss: 701.2509715771675
36 Loss: 709.2913711929322
37 Loss: 710.2471

KeyboardInterrupt: 

In [None]:
# Read model
net = MyNet()
net.load_state_dict(torch.load("best_model.pt"))

Y_test_hat = net.forward(torch.from_numpy(X_test).type(torch.float32)).detach().numpy()

# Plot test result using plolty
import plotly.graph_objects as go
fig = go.Figure()

fig = go.Figure()
fig.add_traces( go.Scatter(x=X_test, y=Y_test,name="train",mode="markers") )
fig.add_traces( go.Scatter(x=X_test, y=Y_test_hat,name="train_hat",mode="markers") )
fig.show()



You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.

