In [2]:
!pip install jovian -q --upgrade

In [3]:
import torch
import jovian # cloud platform
import torch.nn as nn ## help us in creating and training of the neural network
import pandas as pd 
import matplotlib.pyplot as plt
import torch.nn.functional as F 
from torch.utils.data import DataLoader, TensorDataset, random_split ##need to review these 

<IPython.core.display.Javascript object>

In [4]:
data = "car_dataset.csv"

In [7]:
df_raw = pd.read_csv(data, delimiter=';')
df_raw.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [24]:
def customize_dataset(df_raw, random_str):
    df = df_raw.copy(deep = True) #copies the df
    df = df.sample(int(0.95*len(df)), random_state=int(ord(random_str[0]))) # random sample 
    df.Year = df.Year * ord(random_str[1])/100. #scale input?
    df.Selling_Price = df.Selling_Price * ord(random_str[2])/100. #scale target?
    #ord: converts a character into its Unicode code value.
    if ord(random_str[3]) % 2 == 1:
        df = df.drop(['Car_Name'], axis=1)
    return df

    

In [25]:
your_name = "deneme"
df = customize_dataset(df_raw, your_name)
df.head()
    

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
199,2027.07,0.132,0.58,53000,Petrol,Individual,Manual,0
201,2030.1,3.575,6.79,58000,Diesel,Dealer,Manual,1
111,2036.16,1.265,1.5,8700,Petrol,Individual,Manual,0
125,2029.09,0.99,1.75,40000,Petrol,Individual,Manual,0
46,2033.13,2.915,4.89,64532,Petrol,Dealer,Manual,0


In [26]:
input_columns = ["Year","Present_Price","Kms_Driven","Owner"]
categorical_columns = ["Fuel_Type", "Seller_Type", "Transmission"]
output_columns = ["Selling_Price"]

## Data Preparation Step
### We will use Pytorch to predict car prices using machine learning

In [27]:
# Converting the dataset from dataframe to Pytorch Tensors:
# first convert it to Numpy Array;

def df_to_arrays(df):
    # copy the dataframe
    df_copy = df.copy(deep = True)
    #converting non-numeric categories to numeric
    for col in categorical_columns:
        df_copy[col] = df_copy[col].astype('category').cat.codes # every category assigned to a number
        
    #convert input and output to numpy arrays
    input_array = df_copy[input_columns].to_numpy()
    target_array = df_copy[output_columns].to_numpy()
    return input_array, target_array



input_array, target_array = df_to_arrays(df)

input_array, target_array
     
        
    
    
    
    
    
    
    
    
    
    

(array([[2.02707e+03, 5.80000e-01, 5.30000e+04, 0.00000e+00],
        [2.03010e+03, 6.79000e+00, 5.80000e+04, 1.00000e+00],
        [2.03616e+03, 1.50000e+00, 8.70000e+03, 0.00000e+00],
        ...,
        [2.03212e+03, 9.40000e+00, 7.10000e+04, 0.00000e+00],
        [2.03616e+03, 8.00000e-01, 2.00000e+04, 0.00000e+00],
        [2.03717e+03, 5.10000e-01, 4.30000e+03, 0.00000e+00]]),
 array([[ 0.132],
        [ 3.575],
        [ 1.265],
        [ 0.99 ],
        [ 2.915],
        [ 3.85 ],
        [ 6.6  ],
        [ 3.685],
        [ 0.528],
        [ 3.575],
        [ 0.418],
        [ 6.545],
        [ 7.645],
        [ 3.41 ],
        [ 1.595],
        [ 7.15 ],
        [ 2.75 ],
        [ 0.165],
        [10.175],
        [ 1.32 ],
        [ 0.88 ],
        [12.375],
        [ 4.4  ],
        [ 0.44 ],
        [ 7.15 ],
        [ 9.405],
        [ 0.495],
        [ 4.345],
        [ 0.561],
        [ 1.155],
        [ 7.92 ],
        [18.7  ],
        [ 4.95 ],
        [ 8.25 ],
 

In [28]:
# Now we have numpy arrays. We need to convert them to Pytorch Tensors
# and then we can use tensors to create a variable dataset.

inputs = torch.Tensor(input_array)
targets = torch.Tensor(target_array)

dataset = TensorDataset(inputs, targets)

train_ds, val_ds = random_split(dataset, [228, 57])
batch_size = 128

train_loader = DataLoader(train_ds, batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size)



## Creating the model

In [29]:
input_size = len(input_columns)
output_size = len(output_columns)


In [35]:

 class CarsModel(nn.Module):
        def __init__(self):
            super().__init__()
            self.linear = nn.Linear(input_size,output_size)
            
        def forward(self, xb):
            out = self.linear(xb)
            return out
        
        def training_step(self,batch):
            inputs, targets = batch
            # generating predictions
            out = self(inputs)
            # loss calculation
            loss = F.l1_loss(out,targets)
            return loss
        
        def validation_step(self, batch):
            inputs, targets = batch
            # generating predictions
            out = self(inputs)
            loss = F.l1_loss(out,targets)
            return {'val_loss' : loss.detach()}
        
        def validation_epoch_end(self, outputs):
            batch_losses = [x ['val_loss'] for x in outputs]
            epoch_loss = torch.stack(batch_losses).mean() # combine losses
            return {'val_loss' : epoch_loss.item()}
        
        def epoch_end (self, epoch, result, num_epochs):
            # print result in every 20 th epoch
            if(epoch+1) %20 == 0 or epoch == num_epochs-1:
                print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss']) )
            
            
model = CarsModel()

list(model.parameters())

            



[Parameter containing:
 tensor([[-0.0098,  0.1041,  0.4271, -0.4384]], requires_grad=True),
 Parameter containing:
 tensor([0.2391], requires_grad=True)]

## Training the model

In [38]:
# evalution the algorithm

def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

#fitting algorithm

def fit(epochs, lr, model, train_loader, val_loader, opt_func = torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        #training step
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        #validation step
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history

# check the initial value that val_loss have 

result = evaluate(model, val_loader)
print(result)
            
        







{'val_loss': 16422.3828125}


### Fitting

In [41]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 8060.5161
Epoch [40], val_loss: 7500.9473
Epoch [60], val_loss: 6943.4941
Epoch [80], val_loss: 6386.8442
Epoch [100], val_loss: 5828.4336


In [42]:
# Train repeatdly until have a good val_loss
epochs = 20
lr = 1e-9
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 5772.5835


In [43]:
epochs = 40 
lr = 1e-9
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 5716.6709
Epoch [40], val_loss: 5660.9507


In [44]:
epochs = 80 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 5102.1616
Epoch [40], val_loss: 4544.2339
Epoch [60], val_loss: 3986.6428
Epoch [80], val_loss: 3428.8384


In [45]:
epochs = 70 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 2871.2195
Epoch [40], val_loss: 2314.0083
Epoch [60], val_loss: 1758.5978
Epoch [70], val_loss: 1481.6649


In [46]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 928.7355
Epoch [40], val_loss: 380.4512
Epoch [60], val_loss: 48.2855
Epoch [80], val_loss: 47.5562
Epoch [100], val_loss: 47.0797


In [47]:
epochs = 80 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 46.6961
Epoch [40], val_loss: 45.9884
Epoch [60], val_loss: 45.5300
Epoch [80], val_loss: 45.1270


In [48]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 44.5945
Epoch [40], val_loss: 44.1904
Epoch [60], val_loss: 43.7178
Epoch [80], val_loss: 43.1498
Epoch [100], val_loss: 42.5990


In [49]:
epochs = 80 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 42.2373
Epoch [40], val_loss: 41.4466
Epoch [60], val_loss: 40.9101
Epoch [80], val_loss: 40.4529


In [50]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 39.8497
Epoch [40], val_loss: 39.5868
Epoch [60], val_loss: 38.9476
Epoch [80], val_loss: 38.4655
Epoch [100], val_loss: 37.8831


In [51]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 37.2713
Epoch [40], val_loss: 37.1001
Epoch [60], val_loss: 36.4330
Epoch [80], val_loss: 35.9147
Epoch [100], val_loss: 35.2172


In [52]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 35.0018
Epoch [40], val_loss: 34.2667
Epoch [60], val_loss: 33.9646
Epoch [80], val_loss: 33.2233
Epoch [100], val_loss: 32.7170


In [53]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 32.5893
Epoch [40], val_loss: 31.7008
Epoch [60], val_loss: 31.1708
Epoch [80], val_loss: 30.8828
Epoch [100], val_loss: 30.4542


In [54]:
epochs = 80 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 29.5809
Epoch [40], val_loss: 29.1787
Epoch [60], val_loss: 28.6551
Epoch [80], val_loss: 28.3259


In [55]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 27.8462
Epoch [40], val_loss: 27.0899
Epoch [60], val_loss: 26.5409
Epoch [80], val_loss: 26.1124
Epoch [100], val_loss: 25.8003


In [56]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 25.0465
Epoch [40], val_loss: 24.5930
Epoch [60], val_loss: 24.2319
Epoch [80], val_loss: 23.5196
Epoch [100], val_loss: 23.3849


In [57]:
epochs = 100 
lr = 1e-9
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 23.0273
Epoch [40], val_loss: 22.9602
Epoch [60], val_loss: 22.9002
Epoch [80], val_loss: 22.8499
Epoch [100], val_loss: 22.7976


In [62]:
epochs = 100 
lr = 1e-9
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 22.7551
Epoch [40], val_loss: 22.6953
Epoch [60], val_loss: 22.6522
Epoch [80], val_loss: 22.5991
Epoch [100], val_loss: 22.5477


In [63]:
epochs = 100 
lr = 1e-8
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 22.2815
Epoch [40], val_loss: 21.5574
Epoch [60], val_loss: 21.1252
Epoch [80], val_loss: 20.4866
Epoch [100], val_loss: 20.6266


In [67]:
epochs = 20 
lr = 1e-9
history1 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [20], val_loss: 19.9865


## Using the model Predict the Prices

In [68]:
# Prediction Algorithm

def predict_single(input, target, model):
    inputs = input.unsqueeze(0) #Returns a new tensor with a dimension of size one inserted at the specified position.
    predictions = model(inputs)
    prediction = predictions[0].detach()
    print("Input: ", input)
    print("Target: ", target)
    print("Prediction: ", prediction)
    
    
#Testing with some sample data

input, target = val_ds[0]
predict_single(input,target, model)

Input:  tensor([2.0341e+03, 8.1000e-01, 4.2000e+04, 0.0000e+00])
Target:  tensor([0.4620])
Prediction:  tensor([0.0639])


In [69]:
input, target = val_ds[10]
predict_single(input,target, model)

Input:  tensor([2.0341e+03, 7.0000e+00, 3.6054e+04, 0.0000e+00])
Target:  tensor([4.2900])
Prediction:  tensor([-2.7124])


In [70]:
input, target = val_ds[3]
predict_single(input,target, model)

Input:  tensor([2.0301e+03, 5.2000e-01, 2.2000e+04, 0.0000e+00])
Target:  tensor([0.2750])
Prediction:  tensor([-11.4137])


In [71]:
input, target = val_ds[12]
predict_single(input,target, model)

Input:  tensor([2.0341e+03, 6.8000e+00, 1.6500e+04, 0.0000e+00])
Target:  tensor([5.8300])
Prediction:  tensor([-13.9727])
