# Pytorch - Linear Regression 
I have coverd all the approches to train a linear model which includes 
1. Simple linear model 
2. Sequential Implementation (Added Non linearity )
   (Wanted you to see effect of NL:)
3. Functional Implementation (Added Non linearity )

Hope You will find it interesting and point to point.!

In [1]:
# Basic Imports  
import torch           # Main torch lib 
import torch.nn as nn  # nn module contains all usefull functions 
import numpy as np     
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from torch.utils.data import TensorDataset , DataLoader 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Uploading csv file on colab for Linear Regression 
# cement Slump Data 
from google.colab import files
files.upload()

Saving cement_slump.csv to cement_slump.csv


{'cement_slump.csv': b'Cement,Slag,Fly ash,Water,SP,Coarse Aggr.,Fine Aggr.,SLUMP(cm),FLOW(cm),Compressive Strength (28-day)(Mpa)\r\n273,82,105,210,9,904,680,23,62,34.99\r\n163,149,191,180,12,843,746,0,20,41.14\r\n162,148,191,179,16,840,743,1,20,41.81\r\n162,148,190,179,19,838,741,3,21.5,42.08\r\n154,112,144,220,10,923,658,20,64,26.82\r\n147,89,115,202,9,860,829,23,55,25.21\r\n152,139,178,168,18,944,695,0,20,38.86\r\n145,0,227,240,6,750,853,14.5,58.5,36.59\r\n152,0,237,204,6,785,892,15.5,51,32.71\r\n304,0,140,214,6,895,722,19,51,38.46\r\n145,106,136,208,10,751,883,24.5,61,26.02\r\n148,109,139,193,7,768,902,23.75,58,28.03\r\n142,130,167,215,6,735,836,25.5,67,31.37\r\n354,0,0,234,6,959,691,17,54,33.91\r\n374,0,0,190,7,1013,730,14.5,42.5,32.44\r\n159,116,149,175,15,953,720,23.5,54.5,34.05\r\n153,0,239,200,6,1002,684,12,35,28.29\r\n295,106,136,206,11,750,766,25,68.5,41.01\r\n310,0,143,168,10,914,804,20.5,48.2,49.3\r\n296,97,0,219,9,932,685,15,48.5,29.23\r\n305,100,0,196,10,959,705,20,49,29

# UCI REPOSITORY 
# To show Linear regression I am using cement slump dataset 
## Official link : https://archive.ics.uci.edu/ml/datasets/concrete+slump+test





# Data Preparation

In [171]:
!ls

cement_slump.csv  sample_data


In [217]:
df = pd.read_csv('cement_slump.csv')
df.head()

Unnamed: 0,Cement,Slag,Fly ash,Water,SP,Coarse Aggr.,Fine Aggr.,SLUMP(cm),FLOW(cm),Compressive Strength (28-day)(Mpa)
0,273.0,82.0,105.0,210.0,9.0,904.0,680.0,23.0,62.0,34.99
1,163.0,149.0,191.0,180.0,12.0,843.0,746.0,0.0,20.0,41.14
2,162.0,148.0,191.0,179.0,16.0,840.0,743.0,1.0,20.0,41.81
3,162.0,148.0,190.0,179.0,19.0,838.0,741.0,3.0,21.5,42.08
4,154.0,112.0,144.0,220.0,10.0,923.0,658.0,20.0,64.0,26.82


In [218]:
# No missing Values -- so we can directly start working on model building and training 
df.isnull().sum()

Cement                                0
Slag                                  0
Fly ash                               0
Water                                 0
SP                                    0
Coarse Aggr.                          0
Fine Aggr.                            0
SLUMP(cm)                             0
FLOW(cm)                              0
Compressive Strength (28-day)(Mpa)    0
dtype: int64

In [219]:
# Data - Target Split 
# Here we have 3 Target Variables 
X = df.drop(['SLUMP(cm)','FLOW(cm)','Compressive Strength (28-day)(Mpa)'],axis=1)
y = df[['SLUMP(cm)','FLOW(cm)','Compressive Strength (28-day)(Mpa)']]

In [220]:
X.head()

Unnamed: 0,Cement,Slag,Fly ash,Water,SP,Coarse Aggr.,Fine Aggr.
0,273.0,82.0,105.0,210.0,9.0,904.0,680.0
1,163.0,149.0,191.0,180.0,12.0,843.0,746.0
2,162.0,148.0,191.0,179.0,16.0,840.0,743.0
3,162.0,148.0,190.0,179.0,19.0,838.0,741.0
4,154.0,112.0,144.0,220.0,10.0,923.0,658.0


In [221]:
y.head()

Unnamed: 0,SLUMP(cm),FLOW(cm),Compressive Strength (28-day)(Mpa)
0,23.0,62.0,34.99
1,0.0,20.0,41.14
2,1.0,20.0,41.81
3,3.0,21.5,42.08
4,20.0,64.0,26.82


In [222]:
# By default pytorch uses float32 Tensor and numpy uses float64 
# Float 32 is much faster in operations than float64 
# So we will convert ndarry to pytorch tensor 

# Current dtype
print(X.values.dtype)
print(y.values.dtype)

# Pytorch uses shape(rows,cols) so reshaped X and y 
X = X.values.astype(np.float32).reshape(X.shape[0],-1)
y = y.values.astype(np.float32).reshape(y.shape[0],3)

float64
float64


In [223]:
# Converted nd.array into torch tensor 
X = torch.from_numpy(X)
y = torch.from_numpy(y)

In [224]:
print(X.dtype)
print(type(X))

torch.float32
<class 'torch.Tensor'>


In [225]:
# Lets do the Train - Test split 
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25, random_state=101)
len(X_train) , len(y_train) , len(X_test) , len(y_test) 

(77, 77, 26, 26)

In [226]:
# Lets Create Dataloader so we can get a batch of 32 
# To do this first we need to create a iterable of x and y 
# Which we can do with TensorDataset 

train_data = TensorDataset(X_train,y_train)
test_data = TensorDataset(X_test,y_test)

In [227]:
train_data[0:1]

(tensor([[1.4110e+02, 6.0000e-01, 2.0950e+02, 1.8880e+02, 4.6000e+00, 9.9610e+02,
          7.8920e+02]]), tensor([[23.5000, 53.0000, 30.4300]]))

In [228]:
# Since it is now tuple of tensor we can load it in dataloader to get batches
train_data_loader = DataLoader(train_data,batch_size=32)
# we want to test whole data at once so create batch size = size of data set here 26
test_data_loader = DataLoader(test_data,batch_size=26)

# Model Creation and Training 

In [229]:
# I have created linear model 
# That why i wont be using activation function 
# since we know that without activation function whole neural network is a linear model 
# y = w1x1 + w2x2 + ..... w7X7 

In [230]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [231]:
class Linear_NN(nn.Module):
  def __init__(self,input_shape,output_shape):
    super(Linear_NN,self).__init__()
    self.Layer = nn.Linear(input_shape,output_shape)

  def forward(self,input):
    return self.Layer(input)

In [232]:
model = Linear_NN(X.shape[1],y.shape[1])
model

Linear_NN(
  (Layer): Linear(in_features=7, out_features=3, bias=True)
)

In [233]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [234]:
# Lets Train Model  
n_epoch = 1000
loss_matrix =[]
for epoch in range(n_epoch):
  if epoch%100 ==0:
    print(f'Epoch {epoch}/{n_epoch} ')

  for batch , (input,output) in enumerate(train_data_loader):

    input.device_ = device 
    output.device_ = device
   
    optimizer.zero_grad()            # Removing previously stored Gradient 
    y_pred = model.forward(input)    # Feed Forward Network 
    loss = criterion(output,y_pred)  # Loss Calculation 
    loss.backward()                  # d(loss)/dw 
    optimizer.step()                 # w = w - nd(loss)/dw
    if epoch%100 == 0:
     print(f'    Batch : {batch}  Loss : {loss.item()} ')

  loss_matrix.append(loss.item())


Epoch 0/1000 
    Batch : 0  Loss : 42068.75390625 
    Batch : 1  Loss : 39487.88671875 
    Batch : 2  Loss : 38999.28125 
Epoch 100/1000 
    Batch : 0  Loss : 669.0714721679688 
    Batch : 1  Loss : 839.951416015625 
    Batch : 2  Loss : 785.2457275390625 
Epoch 200/1000 
    Batch : 0  Loss : 550.010009765625 
    Batch : 1  Loss : 704.3904418945312 
    Batch : 2  Loss : 640.0328979492188 
Epoch 300/1000 
    Batch : 0  Loss : 463.3839111328125 
    Batch : 1  Loss : 576.6366577148438 
    Batch : 2  Loss : 527.2811279296875 
Epoch 400/1000 
    Batch : 0  Loss : 385.7088317871094 
    Batch : 1  Loss : 457.9784240722656 
    Batch : 2  Loss : 429.4897766113281 
Epoch 500/1000 
    Batch : 0  Loss : 318.47747802734375 
    Batch : 1  Loss : 356.9642028808594 
    Batch : 2  Loss : 346.8855285644531 
Epoch 600/1000 
    Batch : 0  Loss : 263.0095520019531 
    Batch : 1  Loss : 276.7575988769531 
    Batch : 2  Loss : 279.89935302734375 
Epoch 700/1000 
    Batch : 0  Loss : 219

In [235]:
# Lets test Our model Now 
with torch.no_grad():
  for batch , (input,output) in enumerate(test_data_loader):
    input.device_ = device 
    output.device_ = device

    y_pred = model.forward(input)
    loss = criterion(output,y_pred)
    
    print(f'Loss : {loss.item()}')

Loss : 117.94634246826172


In [236]:
# Test our model on Custom Data 
# since we have 7 Columns lets generate 7 random values 
data = torch.randint(10,50,(7,)).reshape(1,7)

In [237]:
# Getting 3 values at output 
# so our model is working fine 
model.forward(data.float())

tensor([[-2.2538, -1.5809,  9.9382]], grad_fn=<AddmmBackward>)

# Lets Implement Non Linear Model (Sequential Implementation)

In [275]:
# Sequential implementation of seq model 
class Non_Linear_NN(nn.Module):
  def __init__(self,input_shape,output_shape):
    super(Non_Linear_NN,self).__init__()
    self.Layers = nn.Sequential(
        nn.Linear(input_shape,24),
        nn.ReLU(),
        nn.Linear(24,output_shape),
    )
  
  def forward(self,input):
    return self.Layers(input)

In [276]:
model = Non_Linear_NN(X.shape[1],y.shape[1])
model

Non_Linear_NN(
  (Layers): Sequential(
    (0): Linear(in_features=7, out_features=24, bias=True)
    (1): ReLU()
    (2): Linear(in_features=24, out_features=3, bias=True)
  )
)

In [277]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)

In [278]:
# Lets Train Model  
n_epoch = 200
loss_matrix =[]
for epoch in range(n_epoch):
  if epoch%50 == 0:
    print(f'Epoch {epoch}/{n_epoch} ')
  
  for batch , (input,output) in enumerate(train_data_loader):
    input.device_ = device 
    output.device_ = device

    optimizer.zero_grad()            # Removing previously stored Gradient 
    y_pred = model.forward(input)    # Feed Forward Network 
    loss = criterion(output,y_pred)  # Loss Calculation 
    loss.backward()                  # d(loss)/dw 
    optimizer.step()                 # w = w - nd(loss)/dw

    if epoch%50==0:
      print(f'    Batch : {batch}  Loss : {loss.item()} ')

  loss_matrix.append(loss.item())


Epoch 0/200 
    Batch : 0  Loss : 12037.0888671875 
    Batch : 1  Loss : 10193.9482421875 
    Batch : 2  Loss : 9672.953125 
Epoch 50/200 
    Batch : 0  Loss : 223.0378875732422 
    Batch : 1  Loss : 180.7134246826172 
    Batch : 2  Loss : 200.82286071777344 
Epoch 100/200 
    Batch : 0  Loss : 155.7143096923828 
    Batch : 1  Loss : 126.11968231201172 
    Batch : 2  Loss : 136.8185272216797 
Epoch 150/200 
    Batch : 0  Loss : 124.65538787841797 
    Batch : 1  Loss : 103.64739990234375 
    Batch : 2  Loss : 103.22954559326172 


In [279]:
# Lets test Our model Now 
with torch.no_grad():
  for batch , (input,output) in enumerate(test_data_loader):
    input.device_ = device 
    output.device_ = device

    y_pred = model.forward(input)
    loss = criterion(output,y_pred)
    
    print(f'Loss : {loss.item()}')

Loss : 97.08958435058594


In [280]:
# Test our model on Custom Data 
# since we have 7 Columns lets generate 7 random values 
data = torch.randint(10,50,(7,)).reshape(1,7)

In [281]:
model.forward(data.float())

tensor([[-2.3335, -4.9292,  3.8402]], grad_fn=<AddmmBackward>)

# Lets Implement Non Linear Model (Functional Implementation)

In [282]:
# Functional Implementation 
class Non_Linear_NN(nn.Module):
  def __init__(self,input_shape,output_shape):
    super(Non_Linear_NN,self).__init__()
    self.Layer1 = nn.Linear(input_shape,14)
    self.activation = nn.ReLU()
    self.Layer2 = nn.Linear(14,output_shape)
  
  def forward(self,input):
    x = self.Layer1(input)
    x = self.activation(x)
    x = self.Layer2(x)
    return x

In [283]:
model = Non_Linear_NN(X.shape[1],y.shape[1])
model

Non_Linear_NN(
  (Layer1): Linear(in_features=7, out_features=14, bias=True)
  (activation): ReLU()
  (Layer2): Linear(in_features=14, out_features=3, bias=True)
)

In [284]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)

In [288]:
# Lets Train Model  
n_epoch = 200
loss_matrix =[]
for epoch in range(n_epoch):
  if epoch%50 == 0:
    print(f'Epoch {epoch}/{n_epoch} ')
  
  for batch , (input,output) in enumerate(train_data_loader):
    input.device_ = device 
    output.device_ = device

    optimizer.zero_grad()            # Removing previously stored Gradient 
    y_pred = model.forward(input)    # Feed Forward Network 
    loss = criterion(output,y_pred)  # Loss Calculation 
    loss.backward()                  # d(loss)/dw 
    optimizer.step()                 # w = w - nd(loss)/dw

    if epoch%50==0:
      print(f'    Batch : {batch}  Loss : {loss.item()} ')

  loss_matrix.append(loss.item())

Epoch 0/200 
    Batch : 0  Loss : 146.9462127685547 
    Batch : 1  Loss : 102.46778106689453 
    Batch : 2  Loss : 198.8376922607422 
Epoch 50/200 
    Batch : 0  Loss : 131.5950927734375 
    Batch : 1  Loss : 97.1766357421875 
    Batch : 2  Loss : 167.99667358398438 
Epoch 100/200 
    Batch : 0  Loss : 119.29925537109375 
    Batch : 1  Loss : 92.20316314697266 
    Batch : 2  Loss : 144.0394744873047 
Epoch 150/200 
    Batch : 0  Loss : 108.0706558227539 
    Batch : 1  Loss : 87.41178131103516 
    Batch : 2  Loss : 125.18489837646484 


In [289]:
# Lets test Our model Now 
with torch.no_grad():
  for batch , (input,output) in enumerate(test_data_loader):
    input.device_ = device 
    output.device_ = device

    y_pred = model.forward(input)
    loss = criterion(output,y_pred)
    
    print(f'Loss : {loss.item()}')

Loss : 125.61939239501953


In [291]:
# Test our model on Custom Data 
# since we have 7 Columns lets generate 7 random values 
data = torch.randint(10,50,(7,)).reshape(1,7)
model.forward(data.float())

tensor([[1.3259, 2.3484, 4.8835]], grad_fn=<AddmmBackward>)

# Conclusion 

1. When i trained linear model it took 1000 epoch to get a good accuracy.
2. But when i introduced non Linearity it got good accuracy in just 400 epochs.
