In [36]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import time
from itertools import count
import natsort
import datetime
import numpy as np

In [23]:
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import glob
import numpy
import random
import pandas as pd
import tqdm

In [63]:
print(f"Is CUDA supported by this system? {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
# Storing ID of current CUDA device
cuda_id = torch.cuda.current_device()
print(f"ID of current CUDA device: {torch.cuda.current_device()}")
print(f"Name of current CUDA device: {torch.cuda.get_device_name(cuda_id)}")

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

Is CUDA supported by this system? True
CUDA version: 11.3
ID of current CUDA device: 0
Name of current CUDA device: NVIDIA GeForce RTX 2070 Super
cuda:0


In [13]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(device)

cuda:0


In [43]:
train_transforms = A.Compose(
    [
        A.Resize(224,224),
        A.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5)),
        ToTensorV2(),
    ]
)

In [44]:
train_image_paths = []
train_data_path = r"C:\Users\panji\EECS6691_Advanced_DL\Assignment2\training_data_images"
train_image_paths.append(glob.glob(train_data_path + '/*'))

In [45]:
# unpack the listed list
train_image_paths1 = [item for sublist in train_image_paths for item in sublist]

In [46]:
train_image_paths1 = natsort.natsorted(train_image_paths1)

In [47]:
print(len(train_image_paths1))

1967


In [48]:
df = pd.read_csv("Processed_data.csv")
df1 = df.loc[:,"Phases"].to_numpy()

In [49]:
df2 = df1.tolist()
print(len(df2))

1967


In [50]:
dataset_train = pd.DataFrame(
    {'Link': train_image_paths1,
     'Label': df2,
    })

In [51]:
dataset_train1 = dataset_train.sample(frac=1, random_state=1)

In [52]:
train_image_paths = dataset_train1.loc[:,"Link"].to_numpy().tolist()

In [53]:
labels = dataset_train1.loc[:,"Label"].to_numpy().tolist()

In [54]:
train_image_paths, valid_image_paths = train_image_paths[:int(0.8*len(train_image_paths))], train_image_paths[int(0.8*len(train_image_paths)):]  

In [55]:
train_labels, valid_labels = labels[:int(0.8*len(labels))], labels[int(0.8*len(labels)):] 

In [59]:
class SurgicalDataset(Dataset):
    def __init__(self, image_paths, labels, transform=False):
        super(SurgicalDataset, self).__init__()
        self.image_paths = image_paths
        self.labels = labels    #.astype(dtype='int')
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_filepath = self.image_paths[idx]
        image = cv2.imread(image_filepath)
        
        label = self.labels[idx]
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        
        return image, label


In [60]:
train_dataset = SurgicalDataset(train_image_paths,train_labels, train_transforms)
val_dataset = SurgicalDataset(valid_image_paths,valid_labels, train_transforms)

In [61]:
train_loader = DataLoader(
    train_dataset, batch_size=32, shuffle=True
)

valid_loader = DataLoader(
    val_dataset, batch_size=32, shuffle=True
)

In [64]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 53 * 53, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 32)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
net.to(device)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=44944, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=32, bias=True)
)

In [65]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [67]:
for epoch in range(2):  # loop over the dataset multiple times
    t = time.time()
    running_loss = 0.0
    loop = tqdm.tqdm(train_loader, total = len(train_loader), leave = True)
    for img, label in loop:
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = img.to(device), label.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0
    d = time.time()-t
    print(d)
print('Finished Training')

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:21<00:00,  2.38it/s]


21.019352436065674


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:10<00:00,  4.88it/s]

10.257498979568481
Finished Training





In [70]:
classes = [i for i in range(11)]
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in valid_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
print(correct_pred)
print(total_pred)

{0: 15, 1: 26, 2: 19, 3: 15, 4: 30, 5: 111, 6: 0, 7: 8, 8: 34, 9: 0, 10: 80}
{0: 15, 1: 43, 2: 24, 3: 16, 4: 35, 5: 114, 6: 6, 7: 11, 8: 41, 9: 2, 10: 87}


# Data Preprocessing

In [54]:
# Cut frames from videos
for i in range(1,4):
    vidcap = cv2.VideoCapture("RALIHR_surgeon01_fps01_000%d.mp4"% i)
    #print("RALIHR_surgeon01_fps01_000%d.mp4"% i)
    success, image = vidcap.read()
    #print(vidcap)
    #print(image)
    count = 0
    while success:
        cv2.imwrite(r'\training_data_images\Video%dframe%d.jpg'% (i, count), image)
        success,image = vidcap.read()
        count += 1


In [29]:
vidcap = cv2.VideoCapture("RALIHR_surgeon01_fps01_0001.mp4")
# vidcap = cv2.VideoCapture("RALIHR_surgeon01_fps01_000%d.mp4"% i)
print(vidcap)
success, image = vidcap.read()
print(success)
success, image = vidcap.read()
print(success)
#print(image)
count = 0
while success:
    cv2.imwrite(r'C:\Users\panji\EECS6691_Advanced_DL\Assignment2\training_data_images\Video%dframe%d.jpg'% (1, count), image)
    success,image = vidcap.read()
    count += 1

<VideoCapture 000001DB02AEC730>
False
True


In [30]:
#Get labels
a =  pd.read_csv("video.phase.trainingData.clean.StudentVersion.csv")
print(a.head)
a1 = a[a.PhaseName !="Access"]
a2 = a1

for i in range(a2.shape[0]):
    if any([c.isdigit() for c in a2.iat[i,1]]):
        a2.iat[i,1] = a2.iat[i,1][:-1]
    a2.iat[i,1] = a2.iat[i,1].lower()
b1 = a2["PhaseName"].unique()

<bound method NDFrame.head of                         videoName             PhaseName  Start    End
0     RALIHR_surgeon01_fps01_0001                Access    ---    ---
1     RALIHR_surgeon01_fps01_0001      Stationary Idle1  00:00  00:16
2     RALIHR_surgeon01_fps01_0001   Transitionary Idle1  00:16  00:35
3     RALIHR_surgeon01_fps01_0001           Out of body  00:35  01:05
4     RALIHR_surgeon01_fps01_0001   Transitionary Idle2  01:05  01:59
...                           ...                   ...    ...    ...
2020  RALIHR_surgeon01_fps01_0070   Peritoneal closure8  51:24  52:23
2021  RALIHR_surgeon01_fps01_0070  Positioning suture11  52:23  54:08
2022  RALIHR_surgeon01_fps01_0070   Peritoneal closure9  54:08  57:27
2023  RALIHR_surgeon01_fps01_0070  Positioning suture12  57:27  57:55
2024  RALIHR_surgeon01_fps01_0070  Transitionary Idle11  57:55  58:18

[2025 rows x 4 columns]>


In [31]:
def TimeChange(a):
    if len(a) != 8:
        x = time.strptime(a,'%M:%S')
        x1 = int(datetime.timedelta(minutes = x.tm_min, seconds = x.tm_sec).total_seconds())
    elif len(a) == 8 and a[:2] == "00":
        a = a[3:]
        x = time.strptime(a,'%M:%S')
        x1 = int(datetime.timedelta(minutes = x.tm_min, seconds = x.tm_sec).total_seconds())
    else:
        x = time.strptime(a,'%H:%M:%S')
        x1 = int(datetime.timedelta(hours = x.tm_hour, minutes = x.tm_min, seconds = x.tm_sec).total_seconds())
    return x1

In [41]:
indices= []
phases =[]
for i in range(23):
    index = int(a2.iloc[i,0][-2:])
    start_time = int(TimeChange(a2.iloc[i,2]))
    end_time = int(TimeChange(a2.iloc[i,3]))
    indices1 = [index]*(end_time-start_time)
    indices.extend(indices1)
    phases1 = [int(np.where(b1==a2.iloc[i,1])[0])]*(end_time-start_time)
    phases.extend(phases1)
x2 = {"Video":indices, "Phases": phases}
df = pd.DataFrame(x2, columns = ["Video", "Phases"])
df.to_csv("Processed_data.csv")

In [39]:
print(a2.shape[0])
print(215044/a2.shape[0])

2022
106.35212660731949
