In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import StratifiedKFold

#### Create train.csv - Mapping between every train image and class

In [3]:
import os 
inputFolder = '../../updated_data/train'
imgs = []
dirs = []
for root, directories, filenames in os.walk(inputFolder):
    for fname in filenames:
        dirs.append(Path(root).stem)
        imgs.append(fname)

In [4]:
df = pd.DataFrame(zip(imgs, dirs))

In [5]:
df = df.sample(frac=1).reset_index(drop=True)

In [6]:
df.columns = ['Image', 'Id']

In [10]:
df.Image = df.Image.map(lambda x: Path(x).stem+'.jpg')

In [11]:
df.to_csv('train.csv', index=False)

In [12]:
df.shape

(4539, 2)

##### Create bboxs.csv - Bounding box for every image (train + test)

In [13]:
bb_test = pd.read_csv('source/bb_test.csv', usecols=["img", "x0", "y0", "x1", "y1"])
bb_train = pd.read_csv('source/bb_train.csv', usecols=["img", "x0", "y0", "x1", "y1"])

In [14]:
bboxs = bb_test.append(bb_train)

In [15]:
bboxs = bboxs.rename(columns={"img": "Image"})

In [17]:
bboxs.Image = bboxs.Image.map(lambda x: Path(x).stem+'.jpg')

In [18]:
bboxs.to_csv('bboxs.csv', index=False)

#####  Create label.csv - Mapping of index to classes

In [19]:
label = df.Id.reset_index()

In [20]:
label.columns = ['id', 'name']

In [23]:
label.to_csv('label.csv', index=False)

In [24]:
X, y = df.Image, df.Id

In [26]:
kf =StratifiedKFold(n_splits=5, random_state=1, shuffle=True)
i = 1
for train_index, val_index in kf.split(X, y):
    df.loc[train_index].to_csv(f'train_split_{i}.csv', index=False)
    df.loc[val_index].to_csv(f'valid_split_{i}.csv', index=False)
    i = i + 1



In [33]:
df.loc[train_index].Image.map(lambda x: Path(x).suffix).unique()

array(['.jpg'], dtype=object)

In [30]:
df.loc[train_index].Image

0       PM-WWA-20140813-067.jpg
1       PM-WWA-20140617-024.jpg
2       PM-WWA-20140412-171.jpg
3       PM-WWA-20110707-096.jpg
4       PM-WWA-20160804-043.jpg
                 ...           
4532    PM-WWA-20060911-009.jpg
4533    PM-WWA-20101020-045.jpg
4534    PM-WWA-20100727-024.jpg
4536    PM-WWA-20080807-010.jpg
4537    PM-WWA-20150621-381.jpg
Name: Image, Length: 3632, dtype: object