In [5]:
import os
if not os.path.exists("calib_challenge"):
    ! git clone https://github.com/romater0/calib_challenge.git
    ! mv calib_challenge/labeled /content 

In [6]:
import torch
import torchvision
import cv2 
import numpy as np

In [7]:
# Extracts the frames of a video
temp_video_file = os.path.join('labeled', "0.hevc")
REBUILD_DATA = os.path.exists('data')

def extract_frames(video_file):
    if not os.path.exists("data"):
        os.mkdir("data")
    cap = cv2.VideoCapture(video_file)   
    frames = []

    while(cap.isOpened  ()):
        frameId = cap.get(1) 
        ret, frame = cap.read()

        if (ret != True):
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = cv2.resize(gray, (50, 50))
        frames.append(gray)

    np.save(os.path.join('data', os.path.split(video_file)[1].split('.')[0] + ".npy"), frames)
    
    cap.release()

if REBUILD_DATA:
    extract_frames(temp_video_file)

In [8]:
frame_data = np.load(os.path.join("data", "0.npy"))
labels = np.loadtxt(os.path.join("labeled", "0.txt"), dtype='float16')
input_shape = frame_data.shape

In [9]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self, input_shape):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=(3,3))
        self.conv2 = nn.Conv2d(16, 32, kernel_size=(3,3))

        x = torch.randn(input_shape).view(-1,1,input_shape[1],input_shape[2])
        self._to_linear = None
        self.convs(x)
        
        self.fcl1 = nn.Linear(self._to_linear, 512)
        self.fcl2 = nn.Linear(512, 2)

        

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)
        x = F.relu(self.fcl1(x))
        x = self.fcl2(x) 
        return F.softmax(x, dim=1)

conv_net = ConvNet(input_shape = input_shape).to(device)

In [11]:
X = torch.Tensor(frame_data).view(-1,1,input_shape[1],input_shape[2])
X = X/255.0
y = torch.Tensor(labels)


In [13]:
from tqdm import tqdm
import torch.optim as optim
EPOCHS = 1
BATCH_SIZE = 10

def train(x, net):
    optimizer = optim.Adam(conv_net.parameters(), lr = 0.001)
    loss_function = nn.MSELoss()
    for epoch in range(EPOCHS):
        for i in tqdm(range(0, len(X), BATCH_SIZE)):
            batch_X = X[i: i + BATCH_SIZE]
            batch_y = y[i: i+ BATCH_SIZE]

            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)

            net.zero_grad()

            outputs = conv_net(batch_X)
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()

        print(f"Epoch: {epoch}. Loss: {loss}")

train(X, conv_net)

100%|██████████| 120/120 [00:00<00:00, 211.71it/s]

Epoch: 0. Loss: 0.2179906815290451





In [17]:
# Generate validata dataset
if not os.path.exists('data/1.npy'):
    extract_frames('labeled/1.hevc')

In [18]:
# Validation
test_X = np.load(os.path.join("data", "1.npy"))
test_y = np.loadtxt(os.path.join("labeled", "1.txt"), dtype='float16')
test_X = torch.Tensor(test_X).view(-1,1,input_shape[1],input_shape[2])
test_X = X/255.0
test_y = torch.Tensor(test_y)

In [20]:
def test(net):
    correct = 0
    total = 0
    with torch.no_grad():
        for i in tqdm(range(len(test_X))):
            real_class = test_y[i].to(device)
            net_out = net(test_X[i].view(-1, 1, 50, 50).to(device))[0] 
            predicted_class = torch.argmax(net_out)

            if predicted_class[0] == real_class[0]:
                correct += 1
            total += 1

    print("Accuracy: ", round(correct/total, 3))

test(conv_net)

# yeah I also think well need 

  0%|          | 0/1200 [00:00<?, ?it/s]


IndexError: ignored