TJ Wiegman  
ASM 591 AI  
Final Project  
2024-12-04

In [1]:
# Enable GPU acceleration
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
# Create NN
import torch.nn as nn
import torch.nn.functional as F
BATCH = 20

class VisOdoNet(nn.Module):
    def __init__(self):
        super().__init__()
        # Convolutional layers to learn spatial features
        self.conv1 = nn.Conv2d(4, 5, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(5, 3, kernel_size=3, padding=3, dilation=2)
        self.conv3 = nn.Conv2d(3, 1, kernel_size=5, padding=2)
        
        # LSTM network to learn temporal features
        self.rnn1 = nn.LSTM(input_size=256,
                            hidden_size=8, num_layers=4,
                            bidirectional=False, # should be monotonic
                            batch_first=True)
        
        # And a few FC layers to tie it all together
        self.fc1 = nn.Linear(64, 16)
        self.fc2 = nn.Linear(16, BATCH)
        
    def forward(self, x):         # input [B x 4 x 256 x 256]
        x = F.relu(self.conv1(x)) # shape [B x 5 x 256 x 256]
        x = F.avg_pool2d(x, 2)    # shape [B x 5 x 128 x 128]
        x = F.relu(self.conv2(x)) # shape [B x 3 x 128 x 128]
        x = F.max_pool2d(x, 4)    # shape [B x 3 x  32 x  32]
        x = F.relu(self.conv3(x)) # shape [B x 1 x  32 x  32]
        x = F.avg_pool2d(x, 2)    # shape [B x 1 x  16 x  16]
        x = x.reshape(-1, 256)    # shape [B x 256]
        
        _, (x, c) = self.rnn1(x)  # pull both hidden and cell states
        x = x.reshape(-1, 32) # B x 32 hidden state
        c = c.reshape(-1, 32) # B x 32 cell state
        x = torch.concat((x,c))
        x = F.relu(x.reshape(-1, 64))   # [B x 64]
        x = F.relu(self.fc1(x))   # shape [B x 16]
        x = F.relu(self.fc2(x))   # shape [BATCH]
        return x.reshape(-1)
        

In [4]:
import datetime
from dateutil import parser

def stamp(dt):
    '''Converts a datetime into a string suitable for indexing'''
    # minute = dt.minute - (dt.minute % 10)
    # return f"{dt.date()}_{dt.hour:02}:{minute:02}"
    return f"{dt.date()}_{dt.hour:02}:{dt.minute:02}"

def unstamp(st):
    '''Converts a `stamp` string back into a datetime'''
    date, time = st.split("_")
    yy,mm,dd = date.split("-")
    hh,mn = time.split(":")
    #hh,mn = time, 0
    return datetime.datetime(
        year=int(yy), month=int(mm), day=int(dd),
        hour=int(hh), minute=int(mn)
    )

def roll_avg(frame, new):
    N = frame[1] + 1
    output = frame[0]*(1 - 1/N) + new/N
    return output, N


In [5]:
# Import depth feature extraction model
import cv2
import torch

# Import DAv2 code
import sys
sys.path.append("./")
from Depth_Anything_V2.depth_anything_v2.dpt import DepthAnythingV2

# From https://github.com/DepthAnything/Depth-Anything-V2#use-our-models
model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}

encoder = 'vitl' # or 'vits', 'vitb', 'vitg'

depth_model = DepthAnythingV2(**model_configs[encoder])
depth_model.load_state_dict(torch.load(f"Depth_Anything_V2/checkpoints/depth_anything_v2_{encoder}.pth", weights_only=True, map_location='cpu'))
depth_model = depth_model.to(device).eval()

xFormers not available
xFormers not available


In [6]:
import cv2, time
from threading import Thread
from queue import Queue

class FileVideoStream:
    # queue up frames in the background for faster playback
    # adapted from https://pyimagesearch.com/2017/02/06/faster-video-file-fps-with-cv2-videocapture-and-opencv/
    def __init__(self, path, queueSize=60):
        self.stream = cv2.VideoCapture(path)
        self.stopped = False
        self.Q = Queue(maxsize=queueSize)
    
    def more(self): return self.Q.qsize() > 0
    def read(self): return self.Q.get()
    def stop(self): self.stopped = True

    def update(self):
        while True:
            if self.stopped: return
            if not self.Q.full():
                (grabbed,frame) = self.stream.read()
                if not grabbed:
                    self.stop()
                    return
                self.Q.put(frame)

    def start(self):
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        time.sleep(1.0) # give the queue a second to fill a bit
        return self

In [7]:
# Create dataset
from torch.utils.data import Dataset, DataLoader
# from pynviread import NvidiaReader  # fast but unstable :(
import numpy as np
import cv2, json

class VideoSet(Dataset):
    def __init__(self, video_paths):
        self.jsons = []
        self.sources = []
        self.n_frames = 0
        self.videos = {}
        
        for path in video_paths:
            # Get metadata from JSON
            name, ext = path.split(".")
            assert ext.upper() == "MP4"
            jfile = name + ".json"
            with open(jfile) as file:
                jdata = json.load(file)
            
            # Calculate frame numbers
            startFrame = self.n_frames
            self.n_frames += jdata[-1]["frame"] + 1 # because zero indexed
            endFrame = self.n_frames
            
            # Save data to self
            self.sources.append((startFrame, endFrame, path))
            self.jsons.append(jdata)
            
    def source_lookup(self, idx):
        i = 0
        for start, end, _ in self.sources:
            if idx >= start and idx < end:
                return i
            i += 1
        raise IndexError(f"Index {idx} not found in {self.sources}")
    
    def load_video(self, path):
        self.videos[path] = FileVideoStream(path).start()
    
    def __len__(self):
        return self.n_frames
    
    def __getitem__(self, idx):
        # Lookup correct source for idx
        i = self.source_lookup(idx)
        start, _, path = self.sources[i]
        fidx = idx - start
        if path not in self.videos:
            self.load_video(path) # lazy loading
        
        # Get visual data
        frame = self.videos[path].read() # shape H x W x 3
        depth = np.expand_dims(depth_model.infer_image(frame), -1) # shape H x W x 1
        frame = np.concat([x for x in [frame, depth]], axis = 2)
        frame = cv2.resize(src=frame, dsize=(256,256), interpolation=cv2.INTER_AREA)
        frame = frame.transpose(2,0,1) # because cv2 and pytorch don't agree on axis order
        frame = torch.tensor(frame, dtype=torch.float)
        
        # Get timestamp
        time = stamp(parser.isoparse(self.jsons[i][fidx]["gps_time"]))
        return frame, time


In [8]:
# test_videos = ["/mnt/nas/2024/Tractor02/2024-09-17_175407.MP4", "/mnt/nas/2024/Tractor04/2024-09-10_164521.MP4"]
# test_set = VideoSet(test_videos)

In [8]:
# Ground-Truth Data Import
canbus = {
    "apalis1": "/home/tjw/Documents/apalis24/2024_apalis1.sql",
    "apalis2": "/home/tjw/Documents/apalis24/2024_apalis2.sql",
    "apalis3": "/home/tjw/Documents/apalis24/2024_apalis3.sql",
    "apalis4": "/home/tjw/Documents/apalis24/2024_apalis4.sql"
}

isoblue = {
    # really Tractor02
    "Tractor01": {
        "apalis1": {"start": datetime.datetime(2024,1,1),
                    "end": datetime.datetime(2024,6,17)},
        "apalis3": {"start": datetime.datetime(2024,6,17),
                    "end": datetime.datetime(2024,10,31)}
    },

    # really Tractor03
    "Tractor02": {
        "apalis2": {"start": datetime.datetime(2024,1,1),
                    "end": datetime.datetime(2024,8,22)},
        "apalis4": {"start": datetime.datetime(2024,1,1),
                    "end": datetime.datetime(2024,10,31)}
    },

    # really Tractor04
    "Tractor03": {
        "apalis3": {"start": datetime.datetime(2024,1,1),
                    "end": datetime.datetime(2024,6,17)},
        "apalis1": {"start": datetime.datetime(2024,6,17),
                    "end": datetime.datetime(2024,8,22)},
        "apalis2": {"start": datetime.datetime(2024,8,22),
                    "end": datetime.datetime(2024,10,31)}
    }
}


In [None]:
# Get line totals for DB dumps
import subprocess

line_total = {}

for db in canbus:
    line_total[db] = int(subprocess.check_output(["wc", "-l", canbus[db]]).decode().split()[0])

# line_total = {'apalis1': 274850703, 'apalis2': 350961387, 'apalis3': 99362332, 'apalis4': 33419054}


In [None]:
# Read Isoblue CAN data
import time
from dateutil import parser
MAGIC = 1/900 # empirical scaling factor for CAN bus ground speed to get m/s
BAR_W = 49

gt_speed = {}

for tractor in isoblue:
    gt_speed[tractor] = {}

    for db in isoblue[tractor]:
        stime = time.time()
        start = isoblue[tractor][db]["start"]
        end = isoblue[tractor][db]["end"]

        with open(canbus[db]) as file:
            i = 0
            for line in file:
                if "can0" in line:
                    l = line.split("\t")
                    arb = l[2]
                    if arb[2:6] == "FEF1": # vehicle ground speed PGN
                        
                        # Get datetime of line
                        timestamp = l[0].split(".")[0]
                        if "+" in timestamp:
                            timestamp = timestamp.split("+")[0]
                        dt = parser.isoparse(timestamp)
                        
                        # Check that line is within parameters of tractor
                        if dt >= start and dt < end:
                            hex_data = l[3]
                            speed = MAGIC * (256*int(hex_data[4:6], 16) + 
                                             int(hex_data[6:8], 16))
                            
                            # Save data to output
                            st = stamp(dt)
                            if st not in gt_speed[tractor]:
                                gt_speed[tractor][st] = (speed, 1)
                            else:
                                gt_speed[tractor][st] = roll_avg(
                                    gt_speed[tractor][st], speed
                                )
            
                # Print progress
                i += 1
                if i % 250_000 == 0:
                    y = time.time() - stime
                    x = i/line_total[db]
                    pct = round(BAR_W*x)
                    bar = "="*pct
                    space = " "*(BAR_W-pct)
                    print(f"[{tractor}/{db}]: |{bar}>{space}| ~{round((y/x)-y)}s remain...   ", end="\r")
        print(f"[{tractor}/{db}] Finished with {db} in {round(time.time() - stime)}s {' '*BAR_W}")
    print(f"**[{tractor}] Found a total of {len(gt_speed[tractor])} speeds")

# Estimated total runtime: 10-15 mins


[Tractor01/apalis1] Finished with apalis1 in 128s                                                  
[Tractor01/apalis3] Finished with apalis3 in 46s                                                  
**[Tractor01] Found a total of 6267 speeds
[Tractor02/apalis2] Finished with apalis2 in 162s                                                  
[Tractor02/apalis4] Finished with apalis4 in 15s                                                  
**[Tractor02] Found a total of 1007 speeds
[Tractor03/apalis3] Finished with apalis3 in 45s                                                  
[Tractor03/apalis1] Finished with apalis1 in 122s                                                  
[Tractor03/apalis2] Finished with apalis2 in 167s                                                  
**[Tractor03] Found a total of 11693 speeds


In [8]:
import json

# with open("gt_data.json", "w") as file:
#     json.dump(gt_speed, file)

with open("gt_data.json", "r") as file:
    gt_speed = json.load(file)

In [9]:
training_videos = [
    '/mnt/nas/2024/Tractor02/2024-06-18_083656.MP4',
    #'/mnt/nas/2024/Tractor02/2024-06-18_120818.MP4',
    # '/mnt/nas/2024/Tractor02/2024-06-19_083249.MP4',
    # '/mnt/nas/2024/Tractor02/2024-06-19_095128.MP4',
    # '/mnt/nas/2024/Tractor02/2024-06-19_154328.MP4',
    # '/mnt/nas/2024/Tractor02/2024-09-10_073820.MP4',
    # '/mnt/nas/2024/Tractor02/2024-09-11_082744.MP4'
]

testing_videos = [
    # '/mnt/nas/2024/Tractor02/2024-06-18_083656.MP4',
    # '/mnt/nas/2024/Tractor02/2024-06-18_120818.MP4',
    '/mnt/nas/2024/Tractor02/2024-06-19_083249.MP4',
    # '/mnt/nas/2024/Tractor02/2024-06-19_095128.MP4',
    # '/mnt/nas/2024/Tractor02/2024-06-19_154328.MP4',
    # '/mnt/nas/2024/Tractor02/2024-09-10_073820.MP4',
    # '/mnt/nas/2024/Tractor02/2024-09-11_082744.MP4'
]

In [11]:
# Create data loader
train_set = VideoSet(training_videos)
test_set = VideoSet(testing_videos)

train_loader = DataLoader(
    dataset = train_set,
    batch_size = BATCH,
    shuffle = False # loads videos sequentially == faster reads
)

test_loader = DataLoader(
    dataset = test_set,
    batch_size = BATCH,
    shuffle = False
)

In [None]:
# Create training function
def train(epoch, model, device, optimizer, data_loader, loss_function, gt_data):
    # Prepare model
    model = model.to(device)
    model = model.train()
    
    try:
        for batch_idx, (frame, time) in enumerate(data_loader):
            optimizer.zero_grad()
            frame = frame.to(device)

            # Get ground truth for comparison
            preds = []
            for st in time:
                if st in gt_data:
                    preds.append(gt_data[st][0])
                else:
                    preds.append(np.nan)
            y = np.array(preds).reshape(-1)
            y = torch.tensor(y, dtype=torch.float).to(device)

            # Calculate and record output & loss
            output = model(frame)

            # Ensure dimensions match:
            if output.shape == y.shape:
                loss = loss_function(output, y)
                loss.backward()
                optimizer.step()
            else:
                print(f"Dimension mismatch between prediction {output} and ground truth {y}")
                print(f" └─> Skipping loss calculation for {time}")

            # Periodically report on training progress
            print(f"\rEpoch {epoch}: Training {batch_idx*BATCH}/{len(data_loader.dataset)} " + 
                  f"(Loss: {loss.item():02.4})", end=" "*10)

        print(f"\rEpoch {epoch}: Trained {len(data_loader.dataset)}/{len(data_loader.dataset)} " + 
                  f"(Loss: {loss.item():02.4})" + " "*10)
        return (loss, None)
    except KeyboardInterrupt:
        return (loss, KeyboardInterrupt)

In [None]:
# Create testing function
def test(epoch, model, device, data_loader, loss_function, gt_data):
    # Prepare model and data
    model = model.to(device)
    model = model.eval()
    test_loss = []
    map = []
    
    with torch.no_grad():
        for batch_idx, (frame, time) in enumerate(data_loader):
            # Load data into `device`
            frame = frame.to(device)
            
            # Get ground truth for comparison
            preds = []
            for st in time:
                if st in gt_data:
                    preds.append(gt_data[st][0])
                else:
                    preds.append(np.nan)
            y = np.array(preds).reshape(-1)
            y = torch.tensor(y, dtype=torch.float).to(device)
            
            # Calculate loss and accuracy
            output = model(frame)
            
            # Ensure dimensions match
            if output.shape == y.shape:
                test_loss.append(loss_function(output, y).item())
                map.append(torch.mean(torch.abs((output - y) / y)) * 100)
            else:
                print(f"Dimension mismatch between prediction {output} and ground truth {y}")
                print(f" --> Skipping loss calculation for {time}")
            
            # Periodically report on testing progress
            print(f"\rEpoch {epoch}: Testing {batch_idx*BATCH}/{len(data_loader.dataset)}, estimated MAPE {torch.mean(torch.tensor(map)):02.4}%", end=" "*10)
        print(f"\rEpoch {epoch}: Testing {len(data_loader.dataset)}/{len(data_loader.dataset)}" + " "*10)
    
    # Report results
    test_loss = torch.mean(torch.tensor(test_loss))
    accuracy = torch.tensor(map)
    print(f"Test Result, epoch {epoch}: Avg loss {test_loss:04.4}, MAPE {torch.mean(accuracy):02.4}%")
    
    return accuracy

In [None]:
# Set up training/testing loop
import os

MAX_EPOCHS = 10
model = VisOdoNet()
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters())

model_checkpoint = "/home/tjw/Downloads/asm591ai_model.tar"
if os.path.exists(model_checkpoint):
    checkpoint = torch.load(model_checkpoint, weights_only=True)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    min_epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    print(f"Loaded model from {model_checkpoint}, trained to epoch {min_epoch} with loss {loss.item():0.4f}")
else:
    min_epoch = 1


Loaded model from /home/tjw/Downloads/asm591ai_model.tar, trained to epoch 1 with loss 0.6839


In [None]:
# Train & Test
for epoch in range(min_epoch, MAX_EPOCHS+1):
    loss = train(
        epoch=epoch,
        model=model,
        device=device,
        optimizer=optimizer,
        data_loader=train_loader,
        loss_function=F.mse_loss,
        gt_data=gt_speed["Tractor01"]
    )
    
    torch.save(
        {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss[0],
        },
        model_checkpoint)
    print(f"Saved model to {model_checkpoint}")
    
    if loss[1] == KeyboardInterrupt:
        raise KeyboardInterrupt
    
    accuracy = test(
        epoch=epoch,
        model=model,
        device=device,
        data_loader=test_loader,
        loss_function=F.mse_loss,
        gt_data=gt_speed["Tractor01"]
    )

In [15]:
accuracy = test(
    epoch=1,
    model=model,
    device=device,
    data_loader=test_loader,
    loss_function=F.mse_loss,
    gt_data=gt_speed["Tractor01"]
)

Epoch 1: Testing 4180/4202, estimated MAPE 37.25%          

  test_loss.append(loss_function(output, y).item())


RuntimeError: The size of tensor a (20) must match the size of tensor b (2) at non-singleton dimension 0

In [16]:
accuracy

NameError: name 'accuracy' is not defined