# Comma.ai speed prediction challenge ( MSE <4 achieved on validation dataset)

The Task is done using "End to End Learning for Self-Driving Cars archtecture" ( published in 2016). I have incorporated a few changes in the archtecture in the architecture to improve the results. The input data is the optical flow between the two consecutive images with some other transformations. The framework used is Pytorch. The final MSE is less than 4 on validation test. At the end of the notebook, I have predicted speed for Test.mp4. 

In [None]:
# import cv2 
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets 
from torch.utils.data import Dataset, DataLoader
from skimage import io, transform
from PIL import Image
from torch.autograd import Variable


In [None]:
# to change the brightness
def change_brightness(image, bright_factor):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    hsv_image[:,:,2] = hsv_image[:,:,2] * bright_factor
    image_rgb = cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB)
    return image_rgb

# to get the frames from the video
def get_frames(filename):
    vidcap = cv2.VideoCapture(filename)
    success = True
    data = []
    while success:
        success,image = vidcap.read()
        if success:
            data.append(image)
    return data

# to get the speed from the text file
def get_speed_data(filename):
    speed=[]
    with open(filename) as f:
        for line in f:
            val = line.rstrip('\n')
            val = float(val)
            speed.append(val)
    return speed
            

In [None]:
# speed_data and images_data are lists that have the speed(target variable) and frames from video
# please change the path if the train.mp4 and train.txt are in different folder

path_txt = "train.txt"
path_mp4 = "train.mp4"

speed_data = get_speed_data(path_txt)
images_data= get_frames(path_mp4)

In [None]:
# optical flow function
def opticalFlowDense(frame1,frame2):
    frame1 = frame1[200:400]
    frame1 = cv2.resize(frame1, (0,0), fx = 0.4, fy=0.5)
    frame2 = frame2[200:400]
    frame2 = cv2.resize(frame2, (0,0), fx = 0.4, fy=0.5)
    flow = np.zeros_like(frame1)
    prev = cv2.cvtColor(frame1,cv2.COLOR_BGR2GRAY)
    nxt = cv2.cvtColor(frame2,cv2.COLOR_BGR2GRAY)
    
    flow_mat = None
    image_scale = 0.4
    nb_images = 1
    win_size = 12
    nb_iterations = 2
    deg_expansion = 8
    STD = 1.2
    extra = 0   
    
    flow = cv2.calcOpticalFlowFarneback(gray_current, gray_next,flow_mat,image_scale,nb_images, win_size, nb_iterations, deg_expansion, STD,0)
    
    mag, ang = cv2.cartToPolar(flow_data[...,0], flow_data[...,1])
    flow[...,1] = 255
    flow[...,0] = ang*180/np.pi/2
    flow[...,2] = (mag *15).astype(int)
    return flow



In [None]:
# optical_flow_images is a list that contains opticalFlow flow between every two consecutive images
# speed_Data_final ia average speed every two consecutive image

optical_flow_images = []
speed_Data_final = []
for i in range(0,len(images_data)-1):
    img = cv2.resize(opticalFlowDense(images_data[i],images_data[i+1]),(200,66))/255
    optical_flow_images.append(img)
    
    mean_speed = (speed_data[i] + speed_data[i+1])/2
    label = np.asarray(mean_speed,dtype= np.float32)
    speed_Data_final.append(label)
    
    

In [None]:
# Training and validation split - 80/20

train_data = optical_flow_images[:int(0.8*len(optical_flow_images))]
train_labels = speed_Data_final[:int(0.8*len(speed_Data_final))]

val_data = optical_flow_images[int(0.8*len(optical_flow_images)):]
val_labels = speed_Data_final[int(0.8*len(speed_Data_final)):]



In [None]:
class imagedataset(Dataset):

    def __init__(self,video_file,speed_file,transforms):
        self.transforms = transforms
        self.data   = video_file
        self.labels = speed_file

    def __len__(self): 
        return len(self.data)
    
    def __getitem__(self,index):
        img   = self.data[index]
        label = self.labels[index]
        transformed_image = self.transforms(img)  
        return transformed_image,label


In [None]:
## dataloader

transformations = transforms.Compose([transforms.ToTensor()])

train_data = imagedataset(train_data,
                          train_labels,
                          transforms = transformations)
val_data =   imagedataset(val_data,
                          val_labels,
                          transforms = transformations)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=False)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=32, shuffle=False)


In [None]:
# CNN architecture - End-to-End Deep Learning for Self-Driving Cars Architecture with some modifications

# Need to put drop out 
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2, padding=0)
        self.relu1 = nn.ReLU()

        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=24, out_channels=36, kernel_size=5, stride=2, padding=0)
        self.relu2 = nn.ReLU()

        # Convolution 3
        
        self.cnn3 = nn.Conv2d(in_channels=36, out_channels=48, kernel_size=5, stride=2, padding=0)
        self.relu3 = nn.ReLU()
        self.dropout1=nn.Dropout(p=0.5)
        
        # Convolution 4
        self.cnn4 = nn.Conv2d(in_channels=48, out_channels=64, kernel_size=3, stride=1, padding=0)
        self.relu4 = nn.ReLU()
        
        # Convolution 5
        
        self.cnn5 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=0)
        self.relu5 = nn.ReLU()
        
        # Fully connected 1 
        #self.fc1 = nn.Linear(in_features=1280, out_features=100)
        self.fc1 = nn.Linear(in_features=1152, out_features=100)
        self.relu_fc1 = nn.ReLU()
   
        
        self.fc2 = nn.Linear(in_features=100, out_features=50)
        self.relu_fc2 = nn.ReLU()
        
        self.fc3 = nn.Linear(in_features=50, out_features=10)
        self.relu_fc3 = nn.ReLU()
        
        self.fc4 = nn.Linear(in_features=10, out_features=1)

  
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        
        # Convolution 2 
        out = self.cnn2(out)
        out = self.relu2(out)

        # Convolution 3 
        out = self.cnn3(out)
        out = self.relu3(out)
        out = self.dropout1(out)

        # Convolution 4 
        out = self.cnn4(out)
        out = self.relu4(out)

        # Convolution 5 
        out = self.cnn5(out)
        out = out.reshape(out.size(0), -1)
        out = self.relu5(out)
        
        # Linear function 
        out = self.fc1(out)
        out = self.relu_fc1(out)

 
        out = self.fc2(out)
        out = self.relu_fc2(out)

        
        out = self.fc3(out)
        out = self.relu_fc3(out)
        
        out = self.fc4(out)
        
        return out

In [None]:
# Model, critieria, learning rate , optimizer
model = CNNModel()
criterion = nn.MSELoss()
learning_rate = .0001
optimiser = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [None]:
# train Model

import os
num_epochs =160

# IMP - Please provide path on which the model will be saved
path= " "

# Imp - Please select a threshold of MSE you want to stop the training at , else train for high number of epochs and select the best ones
threshold = 5


for epoch in range(num_epochs):

    total_train_loss =0
    for i, (images, labels) in enumerate(train_loader):
   
        images = images.float()
        labels = labels.float()
        
        if torch.cuda.is_available():
          images = Variable(images.cuda())
        else:
          images = Variable(images)

            
        if torch.cuda.is_available():
          labels = Variable(labels.cuda())
        else:
          labels = Variable(labels)

        
        # Clear gradients w.r.t. parameters
        optimiser.zero_grad()
        
        # Forward pass to get output/logits
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimiser.step()
        
        #running_loss += loss.item()
        total_train_loss += loss.item()
        
    print("Epoch number : {} Training Loss : {} ".format( epoch+1,total_train_loss/len(train_loader))) 
       
    # after every epoch, testing on validation set
    total_val_loss = 0
    for i, (images, labels) in enumerate(val_loader):
        images = images.float()
        labels = labels.float()
      
        if torch.cuda.is_available():
          images = Variable(images.cuda())
        else:
          images = Variable(images)
        
        if torch.cuda.is_available():
          labels = Variable(labels.cuda())
        else:
          labels = Variable(labels)
        
        val_outputs = model(images)
        val_loss_size = criterion(val_outputs, labels)
        total_val_loss += val_loss_size.item()
    Validation_Loss = total_val_loss/len(val_loader)
    print("Epoch number : {} Validation Loss : {} ".format( epoch+1, Validation_Loss)) 
    print(" ")
    print(" ")
    
    # Saving model for each epoch
    torch.save(model.state_dict(), os.path.join(path, 'epoch-{}.pth'.format(epoch)))
    
    if Validation_Loss<threshold:
        print("Finished training")
        break


In [None]:
# Testing  on validation set again with batch size = 1

import os
criterion = nn.MSELoss()
val_data = optical_flow_images[int(0.8*len(optical_flow_images)):]
val_labels = speed_Data_final[int(0.8*len(speed_Data_final)):]


transformations = transforms.Compose([transforms.ToTensor()])

val_data =   imagedataset(val_data,
                          val_labels,
                          transforms = transformations)

val_loader = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False)

def MSE_val(model,val_loader):
    total_val_loss = 0
    pred =[]
    for i, (images, labels) in enumerate(val_loader):
        images = images.float()
        labels = labels.float()

        if torch.cuda.is_available():
          images = Variable(images.cuda())
        else:
          images = Variable(images)
        if torch.cuda.is_available():
          labels = Variable(labels.cuda())
        else:
          labels = Variable(labels)
        val_outputs = model(images)
        item = val_outputs.detach().numpy()
        pred.append(item)
        val_loss_size = criterion(val_outputs, labels)
        total_val_loss += val_loss_size.item()

    Validation_Loss = total_val_loss/len(val_loader)
    return Validation_Loss

# loading the last epoch model
the_model = CNNModel()

# IMP - select the best epoch with min val MSE( or of your choice)

epoch =
the_model.load_state_dict(torch.load(os.path.join(path, 'epoch-{}.pth'.format(epoch))))
the_model.eval()
MSE_val = MSE_val(the_model,val_loader)

print(MSE_val)


Result and conclusion : The final MSE is around 5 on validation set ( 20% of train.mp4 ) . The next task is to generate speed values for test.mp4

# Generate test.txt on test.mp4 data


In [None]:
# make sure you have test.mp4 in same folder or put the correct folder path
Test_Data= get_frames("test.mp4")

In [None]:
optical_flow_images = []
speed_Data_final = []
for i in range(0,len(Test_Data)-1):
    img = cv2.resize(opticalFlowDense(Test_Data[i],Test_Data[i+1]),(200,66))/255
    optical_flow_images.append(img)
    speed_Data_final.append(1)
    

In [None]:
test_data = optical_flow_images
test_labels = speed_Data_final

In [None]:
transformations = transforms.Compose([transforms.ToTensor()])
test_data =   imagedataset(test_data,
                          test_labels,
                          transforms = transformations)

val_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False)



In [None]:
the_model = CNNModel()
the_model.load_state_dict(torch.load(os.path.join(path, 'epoch-{}.pth'.format(epoch))))
the_model.eval()

# testing for complete validation set (not test), batch size = 1
import os
criterion = nn.MSELoss()

def predict(model,val_loader):
    total_val_loss = 0
    list =[]
    for i, (images, labels) in enumerate(val_loader):
        images = images.float()
        labels = labels.float()
        if torch.cuda.is_available():
          images = Variable(images.cuda())
        else:
          images = Variable(images)

        if torch.cuda.is_available():
          labels = Variable(labels.cuda())
        else:
          labels = Variable(labels)
        val_outputs = model(images)
        item = val_outputs.detach().numpy()
        list.append(item)
    return list

#predict using the last model
result = predict(the_model,val_loader)

print(len(test_data))
# taking only the values and appeneding in a list
final_result =[]
for i in range(len(test_data)):
    final_result.append(result[i][0][0])
    
# adding the last value which missed because of opticalflow
final_result.append(final_result[-3])

with open('result_final.txt', 'w') as f:
    for item in final_result:
        f.write("%s\n" % item)