In [1]:
import cv2
import numpy as np
from dataset_maker import datasetmaker,data_browser
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

## Fetch the road patch points

In [2]:
datamaker  = datasetmaker("../sp_data/train.mp4","../sp_data/train.txt")
points = datamaker.get_points_video()
cv2.destroyAllWindows()

Point 1 selected: (11, 302)
Point 2 selected: (634, 305)
Point 3 selected: (478, 257)
Point 4 selected: (186, 251)


In [3]:
#these would be final as whole model depends upon it. As width and height calculated depends on these points
points
width =  int(np.sum(np.sqrt( (points[0]-points[1]) **2) )) 
height = int(np.sum(np.sqrt( (points[2]-points[1]) **2) ))
tensor_shape = 1,3,height,width
tensor_shape

(1, 3, 204, 626)

## Model training

In [4]:
class SpeedNet(nn.Module):
    def __init__(self, input_shape):
        super(SpeedNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=input_shape[1], out_channels=30, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=30, out_channels=5, kernel_size=5,stride=1)
        self.conv3 = nn.Conv2d(in_channels=5, out_channels=1, kernel_size=3,stride=1)
        self.pool = nn.MaxPool2d(kernel_size=3, stride=3)
        self.flatten = nn.Flatten() 
        
        # Calculate the size of the flattened features after the convolution and pooling layers
        # This depends on the input image size and the layers' configurations
        conv_output_size = self._get_conv_output(input_shape)
        
        self.fc1 = nn.Linear(conv_output_size, 32)
        self.fc2 = nn.Linear(32, 1)

    def conv_forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        print("after pool1",x.shape,torch.sum(x))
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        print("after pool2",x.shape,torch.sum(x))
        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        print("after pool3",x.shape,torch.sum(x))
        return x
    
    def _get_conv_output(self, shapes):
        x = torch.rand(shapes[1:],dtype=torch.float32)
        print(x.shape)
        x = self.conv_forward(x)
        print(x.shape)
        sizee = int(torch.prod(torch.tensor(x.size())))
        print(sizee)
        return sizee

    def forward(self, x):

        y = self.conv_forward(x)
        #print('after ',torch.sum(y))
        z = self.flatten(y)
        d = F.relu(self.fc1(z))
        r = self.fc2(d)
        print('final out',r)
        return r

In [5]:
class Mini_SpeedNet(nn.Module):
    def __init__(self, input_shape):
        super(Mini_SpeedNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=input_shape[1], out_channels=1, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=20, stride=10)
        self.flatten = nn.Flatten() 
        
        # Calculate the size of the flattened features after the convolution and pooling layers
        # This depends on the input image size and the layers' configurations
        conv_output_size = self._get_conv_output(input_shape)
        
        self.fc1 = nn.Linear(conv_output_size, 32)
        self.fc2 = nn.Linear(32, 1)

    def conv_forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        #print("after pool1",x.shape,torch.sum(x))
        return x
    
    def _get_conv_output(self, shapes):
        x = torch.rand(shapes[1:],dtype=torch.float32)
        print(x.shape)
        x = self.conv_forward(x)
        print(x.shape)
        sizee = int(torch.prod(torch.tensor(x.size())))
        print(sizee)
        return sizee

    def forward(self, x):

        y = self.conv_forward(x)
        #print('after ',torch.sum(y))
        z = self.flatten(y)
        d = F.relu(self.fc1(z))
        r = self.fc2(d)
        #print('final out',r)
        return r

In [6]:
model = Mini_SpeedNet(tensor_shape)

torch.Size([3, 204, 626])
torch.Size([1, 19, 61])
1159


In [9]:
criterion = nn.MSELoss()
optimizer = optim.Adagrad(model.parameters(),lr = 0.001)

In [10]:
model.train()

Mini_SpeedNet(
  (conv1): Conv2d(3, 1, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=20, stride=10, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=1159, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)

In [11]:
for input, label in datamaker.generate_tensor_data(points):
    print(input.shape)

torch.Size([10, 3, 204, 626])
torch.Size([10, 3, 204, 626])
torch.Size([10, 3, 204, 626])
torch.Size([10, 3, 204, 626])
torch.Size([10, 3, 204, 626])
torch.Size([10, 3, 204, 626])
torch.Size([10, 3, 204, 626])
torch.Size([10, 3, 204, 626])
torch.Size([10, 3, 204, 626])
torch.Size([10, 3, 204, 626])


In [13]:
epoch = 0
for input, label in datamaker.generate_tensor_data(points,batchsize=100,epochs=100):
    # Zero the parameter gradients
    optimizer.zero_grad()
    
    # Forward pass
    outputs = model(input)
    loss = criterion(outputs.squeeze(), label)
    
    # Backward pass and optimize
    loss.backward()
    optimizer.step()
    

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')
    epoch+=1

print('Finished Training')

Epoch 1, Loss: 0.36155226826667786
Epoch 2, Loss: 0.38207533955574036
Epoch 3, Loss: 0.36637449264526367
Epoch 4, Loss: 0.43566522002220154
Epoch 5, Loss: 0.5719499588012695
Epoch 6, Loss: 0.44131535291671753
Epoch 7, Loss: 0.32337111234664917
Epoch 8, Loss: 0.42546921968460083
Epoch 9, Loss: 0.37832510471343994
Epoch 10, Loss: 0.31713026762008667
Finished Training


In [127]:
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'model.pth')

In [128]:
np.save("road_patch.npy",points)

## Reload the Model

In [129]:
#Reload the dataloader,model and road_patch
datamaker  = datasetmaker("../sp_data/train.mp4","../sp_data/train.txt")
points = np.load("road_patch.npy")
width =  int(np.sum(np.sqrt( (points[0]-points[1]) **2) )) 
height = int(np.sum(np.sqrt( (points[2]-points[1]) **2) ))
tensor_shape = 1,3,height,width
model = Mini_SpeedNet(tensor_shape)
# Load the model and optimizer state_dict
checkpoint = torch.load('model.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])


torch.Size([3, 249, 638])
torch.Size([1, 23, 62])
1426


In [130]:
frame_count = 0
for input, label in datamaker.generate_tensor_data(points):
        
        # Forward pass
        outputs = model(input)
        loss = criterion(outputs.squeeze(), label)

        frame_count+=1
        print(outputs.squeeze(),label,loss.item())
        if(frame_count%10==0):
            break

tensor(0.2179, grad_fn=<SqueezeBackward0>) tensor(0.9983) 0.608972430229187
tensor(0.2146, grad_fn=<SqueezeBackward0>) tensor(1.) 0.6168603301048279
tensor(0.2254, grad_fn=<SqueezeBackward0>) tensor(0.9985) 0.5977466106414795
tensor(0.2235, grad_fn=<SqueezeBackward0>) tensor(0.9970) 0.598353385925293
tensor(0.2202, grad_fn=<SqueezeBackward0>) tensor(0.9932) 0.5974103212356567
tensor(0.2194, grad_fn=<SqueezeBackward0>) tensor(0.9920) 0.5969988703727722
tensor(0.2180, grad_fn=<SqueezeBackward0>) tensor(0.9898) 0.5956991314888
tensor(0.2167, grad_fn=<SqueezeBackward0>) tensor(0.9919) 0.6008511185646057
tensor(0.2166, grad_fn=<SqueezeBackward0>) tensor(0.9894) 0.597251832485199
tensor(0.2179, grad_fn=<SqueezeBackward0>) tensor(0.9892) 0.5949372053146362


In [119]:
label

tensor(0.9892)

In [131]:
np.random.random_integers(2,10,size=10)

  np.random.random_integers(2,10,size=10)


array([ 7,  4,  5,  5,  2,  2,  8,  4,  9, 10])

In [134]:
j =np.random.randint(2,10,size=10)

In [135]:
for i in j:
    print(i)

2
2
9
9
2
4
8
3
7
2
