In [1]:
from pathlib import Path
import cv2
from PIL import Image
import random

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler
import tensorflow as tf
import tensorflow.keras.layers as tfl

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda')
device=torch.device(device)

In [3]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [34]:
class TrafficData(Dataset):
    def __init__(self, df, image_dir, transforms=None):
        self.image_ids = df['Path'].unique()
        self.df = df
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index, size = [100, 100]):
        image_path = self.image_ids[index]
        records = self.df[self.df['Path'] == image_path]

        #print(f'{self.image_dir}/{image_path}')
        image = cv2.imread(f'{self.image_dir}/{image_path}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, size)
        image = image.astype(float) / 255.0

        target = records['ClassId'].values

        if self.transforms:
            image = self.transforms(**image)

        return image, target, image_path

    def __len__(self) -> int:
        return self.image_ids.shape[0]

    @staticmethod
    def create_dataset(df, dir, transform=None):
       dataset = TrafficData(df, dir)
       return dataset

    @staticmethod 
    def loader(dataset, batch_size, num_workers=0):
       data_loader = DataLoader(
          dataset,
          batch_size=batch_size,
          shuffle=True,
          num_workers=num_workers,
          collate_fn = collate_fn
      )
       return data_loader

In [5]:
path = Path("Data_images")

In [6]:
#6 minutes
df_train = pd.DataFrame(columns=['ClassId', 'Path'])

train_path = path / 'Train'
for folder in train_path.glob("*"):
    #print(folder)
    class_id = int(str(folder)[len(str(train_path)) + 1:])
    for im in folder.glob("*"):
        #print(p)
        df_train = pd.concat([df_train, pd.DataFrame({'ClassId': [class_id], 'Path': [str(im)[len(str(path)) + 1:]]})], ignore_index=True)
        #df_train.loc[df_train.shape[0]] = [class_id, str(im)[len(str(path)) + 1:]]

df_train.to_csv("Data_images/Train_data.csv")
df_train.head()

Unnamed: 0,ClassId,Path
0,0,Train\0\00000_00000_00000.png
1,0,Train\0\00000_00000_00001.png
2,0,Train\0\00000_00000_00002.png
3,0,Train\0\00000_00000_00003.png
4,0,Train\0\00000_00000_00004.png


In [25]:
#from random import shuffle

df_test_messy = pd.read_csv('Data_Images/Test_data.csv')
df_test = pd.DataFrame(columns=['ClassId', 'Path'])

test_path = path / 'Test'
for pic in test_path.glob("*"):
    name = str(pic)[len(str(test_path)) + 1:]
    row = df_test_messy[df_test_messy['Path'] == f'Test/{name}']
    df_test = pd.concat([df_test, row])

df_test.to_csv("Test_data_cleaned.csv")
df_test.head()

Unnamed: 0,ClassId,Path
25765,26,Test/10502.png
36323,2,Test/10503.png
27651,9,Test/10504.png
12439,3,Test/10505.png
13974,39,Test/10506.png


In [26]:
print(df_test_messy.shape[0])
print(df_test.shape[0])

53453
41692


In [35]:
train_data = TrafficData.create_dataset(df_train, path)
train_data_loader = TrafficData.loader(train_data, 16)

test_data = TrafficData.create_dataset(df_test, path)
test_data_loader = TrafficData.loader(test_data, 16)

In [36]:
images, targets, image_ids = next(iter(test_data_loader))