In [1]:
import cv2
import numpy as np
from dataset_maker import datasetmaker,data_browser
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [2]:
datamaker  = datasetmaker("../sp_data/train.mp4","../sp_data/train.txt")
points = datamaker.get_points_video()
cv2.destroyAllWindows()

Point 1 selected: (5, 291)
Point 2 selected: (619, 296)
Point 3 selected: (516, 267)
Point 4 selected: (138, 258)


In [3]:
#these would be final as whole model depends upon it. As width and height calculated depends on these points
points
width =  int(np.sum(np.sqrt( (points[0]-points[1]) **2) )) 
height = int(np.sum(np.sqrt( (points[2]-points[1]) **2) ))
tensor_shape = 1,3,height,width
tensor_shape

(1, 3, 132, 619)

In [4]:
class SpeedNet(nn.Module):
    def __init__(self, input_shape):
        super(SpeedNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=input_shape[1], out_channels=30, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=30, out_channels=1, kernel_size=10)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        
        # Calculate the size of the flattened features after the convolution and pooling layers
        # This depends on the input image size and the layers' configurations
        conv_output_size = self._get_conv_output(input_shape)
        
        self.fc1 = nn.Linear(conv_output_size, 128)
        self.fc2 = nn.Linear(128, 1)
        
    def _get_conv_output(self, shapes):
        x = torch.rand(shapes[1:],dtype=torch.float32)
        print(x.shape)
        x = self.pool(self.conv2(self.conv1(x)))
        sizee = int(torch.prod(torch.tensor(x.size())))
        print(sizee)
        return sizee

    def forward(self, x):

        #print("1",x[0,0,0,0])
        x = self.conv1(x)
     
        x = F.relu(x)
        x = self.conv2(x)
        #print("2",x[0,0,0,0])
        x = F.relu(x)

        x = self.pool(x)
        #print("3",x[0,0,0,0])
        x = self.flatten(x)
        #print(x[0,0])
        x = F.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return x

In [5]:
model = SpeedNet(tensor_shape)

torch.Size([3, 132, 619])
18240


In [6]:
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr = 0.01)



In [7]:
model.train()

SpeedNet(
  (conv1): Conv2d(3, 30, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(30, 1, kernel_size=(10, 10), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=18240, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)

In [11]:
for epoch in range(10):
    running_loss = 0.0
    frame_count = 0
    for input, label in datamaker.generate_tensor_data(points):
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(input)
        loss = criterion(outputs.squeeze(), label)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Print statistics
        running_loss += loss.item()
        frame_count+=1
        if(frame_count%100==0):
            print(f'Epoch {epoch+1}, Loss: {loss.item()/datamaker.frame_count}')
    print(f'Epoch {epoch+1}, Loss: {running_loss/datamaker.frame_count}')

print('Finished Training')

Epoch 1, Loss: 0.034446001239851406
Epoch 1, Loss: 0.028248922310623467
Epoch 1, Loss: 0.01836553835401348
Epoch 1, Loss: 0.012824621761546416
Epoch 1, Loss: 0.013389220892214308
Epoch 1, Loss: 0.01480935489430147
Epoch 1, Loss: 0.0196737356746898
Epoch 1, Loss: 0.024007014854281555
Epoch 1, Loss: 0.019445253259995403
Epoch 1, Loss: 0.017851966409122243
Epoch 1, Loss: 0.01562648847991345
Epoch 1, Loss: 0.012200661453546263
Epoch 1, Loss: 0.00971560983096852
Epoch 1, Loss: 0.010342975691253064
Epoch 1, Loss: 0.012320078681497013
Epoch 1, Loss: 0.014884791654698989
Epoch 1, Loss: 0.017748119877833948
Epoch 1, Loss: 0.018561987783394608
Epoch 1, Loss: 0.029261061724494487
Epoch 1, Loss: 0.026741692038143382
Epoch 1, Loss: 0.02539392209520527
Epoch 1, Loss: 0.028346078910079658
Epoch 1, Loss: 0.029862997017654717
Epoch 1, Loss: 0.0286373841528799
Epoch 1, Loss: 0.03121762743183211
Epoch 1, Loss: 0.03106094958735447
Epoch 1, Loss: 0.03171743953929228
Epoch 1, Loss: 0.03245514215207567
Epoch

KeyboardInterrupt: 

In [12]:
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'model.pth')

In [None]:
np.save("road_patch.npy",points)