In [1]:
import os
from os import listdir
from os.path import join, isfile
from PIL import Image

import numpy as np
import pandas as pd
# import tensorflow as tf
import matplotlib.pyplot as plt
import cv2

import torch
import torchvision
import torch.nn.functional as F
from torch import nn, optim
from efficientnet_pytorch import EfficientNet
from torchvision.transforms import transforms, Lambda
from torch.utils.data import random_split
import random
from torch.utils.data import DataLoader
import glob

In [2]:
original_address = '/scratch/student/shokoofa/'

In [3]:
train_path = set(i.split('/')[-2] for i in glob.glob(os.path.join(original_address + '/dataset/Train/*', '*')))
valid_path = set(i.split('/')[-2] for i in glob.glob(os.path.join(original_address + '/dataset/Valid/*', '*')) )
test_path = set( i.split('/')[-2] for i in glob.glob(os.path.join(original_address + '/dataset/Test/*', '*')) )

In [4]:
info = pd.read_csv(os.path.join(original_address , 'labels_file.csv'))

In [5]:
info['sample_path'] = info['sample_path'].astype(str)
info['path'] = info['sample_path'].apply(lambda x: '/'.join(x.split(' ')[:2]))
label_path = info['path'].values
data_labels = info['label'].values

In [6]:
image_train_path = []
image_test_path = []
image_val_path = []

label_train_path = []
label_test_path = []
label_val_path = []

for i,sub_folder in enumerate(label_path):
    if sub_folder.split('/')[0] in train_path:
        label_train_path.append(data_labels[i])
        image_train_path.append('Train/' + sub_folder)
    elif sub_folder.split('/')[0] in valid_path:
        label_val_path.append(data_labels[i])
        image_val_path.append('Valid/' + sub_folder)
    else:
        label_test_path.append(data_labels[i])
        image_test_path.append('Test/' + sub_folder)

In [10]:
train_df = pd.DataFrame({'path': image_train_path, 'label': label_train_path})
val_df = pd.DataFrame({'path': image_val_path, 'label': label_val_path})
test_df = pd.DataFrame({'path': image_test_path, 'label': label_test_path})

In [11]:
train_df

Unnamed: 0,path,label
0,Train/6fwt32TiT_KD-QU8t64_eg/65,Apical-3ch
1,Train/6fwt32TiT_KD-QU8t64_eg/46,Apical-4ch
2,Train/6fwt32TiT_KD-QU8t64_eg/47,Apical-4ch
3,Train/6fwt32TiT_KD-QU8t64_eg/83,Subcostal
4,Train/6fwt32TiT_KD-QU8t64_eg/62,Apical-2ch
...,...,...
2517,Train/0oMWy6kqR0edXPJd8BbxYg/8,PLAX
2518,Train/0oMWy6kqR0edXPJd8BbxYg/57,Apical-3ch
2519,Train/0oMWy6kqR0edXPJd8BbxYg/33,Apical-4ch
2520,Train/0oMWy6kqR0edXPJd8BbxYg/74,Subcostal


In [12]:
test_df

Unnamed: 0,path,label
0,Test/2TmBIz3oRY6M5Iicg6qIrQ/21,PSAX-apical
1,Test/2TmBIz3oRY6M5Iicg6qIrQ/11,PSAX-ves
2,Test/2TmBIz3oRY6M5Iicg6qIrQ/53,Subcostal
3,Test/2TmBIz3oRY6M5Iicg6qIrQ/2,PLAX
4,Test/2TmBIz3oRY6M5Iicg6qIrQ/18,PSAX-base
...,...,...
209,Test/3nRPNSozRYSJy2tmnGdyzg/37,Apical-4ch
210,Test/3nRPNSozRYSJy2tmnGdyzg/81,Subcostal
211,Test/3nRPNSozRYSJy2tmnGdyzg/5,PLAX
212,Test/3nRPNSozRYSJy2tmnGdyzg/19,PSAX-ves


## Window Sliding

In [67]:
def new_WS_df(image_list, label_list, dict, window_size = 10, window_slide = 5):
    for idx in range(len(image_list)):
        frames_per_sample = []
        for item in glob.glob(os.path.join(original_address + '/dataset/' + image_list[idx] + '/', '*')):
            frames_per_sample.append(int(item.split('_')[-1].replace('.jpeg', '')))
        frames_per_sample = sorted(frames_per_sample)
        length = frames_per_sample[-1] - frames_per_sample[0]
        i = 0
        ls = []
        while (i+ window_size <= length):
            ls.append([*range(frames_per_sample[i], frames_per_sample[i + window_size])])
            i += window_slide
        for num_sub_videos in range(len(ls)):
            dict['path'].append('/'.join(item.split('/')[-4:-1])) 
            dict['label'].append(label_list[idx])
            dict['start'].append(ls[num_sub_videos][0])
            dict['end'].append(ls[num_sub_videos][-1])
    new_df = pd.DataFrame(dict)
    return(new_df)

In [73]:
dict = {'path':[], 'label':[], 'start': [], 'end': []}
window_size = 10
window_slide = 5
new_train_df = new_WS_df(image_train_path, label_train_path, dict )
dict = {'path':[], 'label':[], 'start': [], 'end': []}
new_val_df = new_WS_df(image_val_path, label_val_path, dict )
dict = {'path':[], 'label':[], 'start': [], 'end': []}
new_test_df = new_WS_df(image_test_path, label_test_path, dict )

In [76]:
new_test_df

Unnamed: 0,path,label,start,end
0,Test/2TmBIz3oRY6M5Iicg6qIrQ/21,PSAX-apical,4,13
1,Test/2TmBIz3oRY6M5Iicg6qIrQ/21,PSAX-apical,9,18
2,Test/2TmBIz3oRY6M5Iicg6qIrQ/21,PSAX-apical,14,23
3,Test/2TmBIz3oRY6M5Iicg6qIrQ/21,PSAX-apical,19,28
4,Test/2TmBIz3oRY6M5Iicg6qIrQ/21,PSAX-apical,24,33
...,...,...,...,...
1381,Test/3nRPNSozRYSJy2tmnGdyzg/29,PSAX-apical,6,15
1382,Test/3nRPNSozRYSJy2tmnGdyzg/29,PSAX-apical,11,20
1383,Test/3nRPNSozRYSJy2tmnGdyzg/29,PSAX-apical,16,25
1384,Test/3nRPNSozRYSJy2tmnGdyzg/29,PSAX-apical,21,30


In [77]:
new_train_df.to_csv('train_labels.csv', index=False)
new_val_df.to_csv('val_labels.csv', index=False)
new_test_df.to_csv('test_labels.csv', index=False)