In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import time

from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

# pretrained models
import torchvision
from torchvision import models, transforms

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# What's in this dataset?
import os
os.listdir('drive/MyDrive/DeepLearningProject/PotholeDataset')

['test', 'train']

In [4]:
!find . -name "*.DS_Store" -type f -delete

In [5]:
# create a dataframe for our data
data_path = 'drive/MyDrive/DeepLearningProject/PotholeDataset'

rows = []
for dataset in ['test','train']:
    for label in os.listdir(data_path + f'/{dataset}'):
        for image in os.listdir(data_path + f'/{dataset}' + f'/{label}'):
            row = dict()
            row['image_file'] = image
            row['label'] = label
            row['dataset'] = dataset
        
            # a bit redudant, could build from other data in __getitem__ if wanted
            row['image_path'] = data_path + f'/{dataset}' + f'/{label}'
            rows.append(row)
        
df = pd.DataFrame(rows)
print(len(df))
df.head()

738


Unnamed: 0,image_file,label,dataset,image_path
0,7.jpg,Plain,test,drive/MyDrive/DeepLearningProject/PotholeDatas...
1,5.jpg,Plain,test,drive/MyDrive/DeepLearningProject/PotholeDatas...
2,4.jpg,Plain,test,drive/MyDrive/DeepLearningProject/PotholeDatas...
3,6.jpg,Plain,test,drive/MyDrive/DeepLearningProject/PotholeDatas...
4,8.jpg,Plain,test,drive/MyDrive/DeepLearningProject/PotholeDatas...


In [6]:
# training and validation data
df_train = df[df['dataset'] == 'train'].reset_index(drop=True)
df_val = df[df['dataset'] == 'test'].reset_index(drop=True)
len(df_train), len(df_val)

(722, 16)

In [7]:
%%capture

!pip install opencv-python

In [8]:
import cv2

def resize_img(path, size):
    img = cv2.imread(path)
    
    start = time.time()
    try:
        img = cv2.resize(img, size)
        cv2.imwrite(path, img)
    except:
        # look at the image
        print(path)
    end = time.time()
    return end - start

# resize all of the images to 512x512
total_time_resize = 0.0
for idx in tqdm(range(len(df_train))):
    row = df_train.iloc[idx]
    image_path = row['image_path']
    fname = row['image_file']
    path = image_path+'/'+fname
    
    total_time_resize += resize_img(path, (512, 512))
    
for idx in tqdm(range(len(df_val))):
    row = df_val.iloc[idx]
    image_path = row['image_path']
    fname = row['image_file']
    path = image_path+'/'+fname
    
    total_time_resize += resize_img(path, (512, 512))

  0%|          | 0/722 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

## Baseline Vanilla CNN

In [9]:
device = torch.device('cuda')

def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [10]:
class PotholeDataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        # label dictionary
        self.label_dict = {'Plain':0, 'Pothole':1}
    
    def __len__(self):
        return len(self.df)

    def __iter__(self):
      for b in self.dl:
        yield to_device(b, self.device)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # get ingredients for retrieving image
        image_path = row['image_path']
        fname = row['image_file']
        path = image_path+'/'+fname
        
        # read the img
        img = cv2.imread(path)
        
        # convert to RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # move color channels to correct spot
        img = np.transpose(img, (2, 0, 1))
        
        # convert to [0,1] scale
        img = torch.tensor(img / 255.).float()
        
        label = torch.tensor(self.label_dict[row['label']])
        
        return img.to(device), label.to(device)

In [11]:
ds_train = PotholeDataset(df_train)
dl_train = DataLoader(ds_train, batch_size = 8, shuffle=True)

ds_val = PotholeDataset(df_val)
dl_val = DataLoader(ds_val, batch_size = 8, shuffle=True)

In [36]:
for img, label in tqdm(dl_train):
    None

  0%|          | 0/91 [00:00<?, ?it/s]

In [31]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        # same padding!
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        
        # doing this to shrink size enough!
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=3, padding=1)
        
        self.linear1 = nn.Linear(4096, 100)
        
        # read documentation for CrossEntropyLoss!
        self.linear2 = nn.Linear(100, 2)
        
        # pooling
        self.pool = nn.MaxPool2d(kernel_size=2)
        
        # activation
        self.relu = nn.ReLU()
        
        # for unrolling into FC layer
        self.unroll = nn.Flatten()
        
    def forward(self, x):
        # helpful to do this along the way sometimes!
        #print(x.shape)
        
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        
        x = self.conv3(x)
        x = self.relu(x)
        x = self.pool(x)
        
        x = self.conv4(x)
        x = self.relu(x)
        
        # unroll x for FC layer
        x = self.linear1(self.unroll(x))
        x = self.relu(x)
        x = self.linear2(x)
        
        return x
    
model = CNN()

In [12]:
def one_pass(model, dataloader, optimizer, lossFun, backwards=True, print_loss=False):
    
    if backwards == True:
        model.train()
    else:
        model.eval()
    
    total_loss = 0.0
    for x, y in tqdm(dataloader):
        
        y_pred = model(x)
        loss = lossFun(y_pred, y)
        total_loss += loss.item()
        
        if backwards == True:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    avg_loss = total_loss / len(dataloader)
    
    if print_loss == True:
        print(avg_loss)
    
    return avg_loss

def one_pass_acc(model, dataloader, num_points):
    model.eval()
    total_incorrect = 0
    
    softmax = nn.LogSoftmax(dim=1)
    
    for x, y in dataloader:
        y_pred = softmax(model(x))
        y_pred = torch.argmax(y_pred, dim=1)
        
        total_incorrect += torch.count_nonzero(y - y_pred).item()
        
    percent_wrong = total_incorrect / num_points
    return 1 - percent_wrong

In [37]:
lossFun = nn.CrossEntropyLoss()
model = CNN()
model.to(device)
optimizer = optim.Adam(model.parameters(),lr=0.001)

num_epochs = 5
train_losses = []
valid_losses = []

for epoch in tqdm(range(num_epochs)):
    print('Epoch: ', epoch)
    
    train_loss = one_pass(model, dl_train, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    
    valid_loss = one_pass(model, dl_val, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    
    train_acc = one_pass_acc(model, dl_train, len(ds_train))
    valid_acc = one_pass_acc(model, dl_val, len(ds_val))
    print('Train Acc: ', train_acc)
    print('Valid Acc: ', valid_acc)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:  0


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.6956364224245261


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.6942651271820068
Train Acc:  0.5055401662049861
Valid Acc:  0.5
Epoch:  1


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.6941014750973209


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.6935341954231262
Train Acc:  0.5055401662049861
Valid Acc:  0.5
Epoch:  2


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.6935013643987886


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.6933093667030334
Train Acc:  0.5055401662049861
Valid Acc:  0.5
Epoch:  3


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.6934756453220661


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.6931976675987244
Train Acc:  0.5055401662049861
Valid Acc:  0.5
Epoch:  4


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.6933933791223463


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.693179577589035
Train Acc:  0.5055401662049861
Valid Acc:  0.5


## Baseline Pretrained: VGG

### Partially Frozen

In [42]:
vgg16 = models.vgg16(pretrained=True)
vgg16.classifier[6] = nn.Linear(in_features=4096, out_features=2)

# pass the appropriate parameters to the optimizer (freeze)
params_to_update = []

for i, param in enumerate(vgg16.parameters()):
    if i % 2 == 0:
        params_to_update.append(param)

vgg16.to(device)

optimizer = optim.Adam(params_to_update, lr=0.01)

In [43]:
num_epochs = 5
train_losses = []
valid_losses = []

for epoch in tqdm(range(num_epochs)):
    print('Epoch: ', epoch)
    
    train_loss = one_pass(vgg16, dl_train, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    
    valid_loss = one_pass(vgg16, dl_val, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    
    train_acc = one_pass_acc(vgg16, dl_train, len(ds_train))
    valid_acc = one_pass_acc(vgg16, dl_val, len(ds_val))
    print('Train Acc: ', train_acc)
    print('Valid Acc: ', valid_acc)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:  0


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  15449895347.030733


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.7613194286823273
Train Acc:  0.4944598337950139
Valid Acc:  0.5
Epoch:  1


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  2.2402493491932587


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.7375655174255371
Train Acc:  0.4944598337950139
Valid Acc:  0.5
Epoch:  2


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  1.5462526141942203


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.7192056775093079
Train Acc:  0.4944598337950139
Valid Acc:  0.5
Epoch:  3


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  1.1625305165301312


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.7015433311462402
Train Acc:  0.5055401662049861
Valid Acc:  0.5
Epoch:  4


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  1.0469333192149362


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.7341116368770599
Train Acc:  0.5055401662049861
Valid Acc:  0.5


### Finetuned

In [14]:
vgg16 = models.vgg16(pretrained=True)
vgg16.classifier[6] = nn.Linear(in_features=4096, out_features=2)
optimizer = optim.Adam(vgg16.parameters(),lr=0.001)
lossFun = nn.CrossEntropyLoss()
vgg16.to(device)

num_epochs = 5
train_losses = []
valid_losses = []

for epoch in tqdm(range(num_epochs)):
    print('Epoch: ', epoch)
    
    train_loss = one_pass(vgg16, dl_train, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    
    valid_loss = one_pass(vgg16, dl_val, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    
    train_acc = one_pass_acc(vgg16, dl_train, len(ds_train))
    valid_acc = one_pass_acc(vgg16, dl_val, len(ds_val))
    print('Train Acc: ', train_acc)
    print('Valid Acc: ', valid_acc)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:  0


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.9979564442739381


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.7056675255298615
Train Acc:  0.4944598337950139
Valid Acc:  0.5
Epoch:  1


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.7119477365043138


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.7134791612625122
Train Acc:  0.4944598337950139
Valid Acc:  0.5
Epoch:  2


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.7014297377932203


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.6935147643089294
Train Acc:  0.4944598337950139
Valid Acc:  0.5
Epoch:  3


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.706666961476043


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.6940906047821045
Train Acc:  0.4944598337950139
Valid Acc:  0.5
Epoch:  4


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.7129230315868671


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.7068807482719421
Train Acc:  0.5055401662049861
Valid Acc:  0.5


### No Initialized Weights

In [41]:
vgg16 = models.vgg16(pretrained=False)
vgg16.classifier[6] = nn.Linear(in_features=4096, out_features=2)
optimizer = optim.Adam(vgg16.parameters(),lr=0.001)
vgg16.to(device)

num_epochs = 2
train_losses = []
valid_losses = []

for epoch in tqdm(range(num_epochs)):
    print('Epoch: ', epoch)
    
    train_loss = one_pass(vgg16, dl_train, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    
    valid_loss = one_pass(vgg16, dl_val, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    
    train_acc = one_pass_acc(vgg16, dl_train, len(ds_train))
    valid_acc = one_pass_acc(vgg16, dl_val, len(ds_val))
    print('Train Acc: ', train_acc)
    print('Valid Acc: ', valid_acc)

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:  0


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  3.445114868027823


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.6937824487686157
Train Acc:  0.5055401662049861
Valid Acc:  0.5
Epoch:  1


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss:  0.6935081246134999


  0%|          | 0/2 [00:00<?, ?it/s]

Valid loss:  0.6933415532112122
Train Acc:  0.5055401662049861
Valid Acc:  0.5
