In [2]:
train_path = '../../chimp_zoo/Chimpanzees'

In [3]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

In [4]:
pd.options.display.max_columns = None
pd.options.display.max_rows = 20

In [5]:
image_paths = []
image_files = []
image_annotations = []

for path, subdirs, files in os.walk(train_path):
    for name in files:
        if(name.endswith('.jpg')):
            data_point = {}
            data_point['file_name'] = name
            data_point['file_path'] = (os.path.join(path, name))
            data_point['class'] = name.split('-')[0]
            image_paths.append(data_point)
            
        elif(name.endswith('.png')):
            data_point = {}
            data_point['file_name'] = name
            data_point['file_path'] = (os.path.join(path, name))
            data_point['class'] = name.split('-')[0]
            image_paths.append(data_point)
            
        elif(name.endswith('.txt')):
            data_point = {}
            with open(os.path.join(path, name),"r") as label:
                data_point['annotation_file'] = name
                data_point['annotation_path'] = (os.path.join(path, name))
                annotation = label.readlines()
                annotation = [x.rstrip('\n') for x in annotation]
                for i in range(len(annotation)):
                    data_point['annotation_' + str(i)] = annotation[i]
            image_annotations.append(data_point)
        else:
            pass

In [6]:
img_path_series = pd.DataFrame(image_paths)
train_data = pd.DataFrame(image_annotations)

In [7]:
train_data.insert(0,'file_name',pd.Series(img_path_series['file_name']))
train_data.insert(2,'image_path',pd.Series(img_path_series['file_path']))

In [8]:
len(train_data)

2060

In [9]:
train_data.head()

Unnamed: 0,file_name,annotation_file,image_path,annotation_path,annotation_0,annotation_1,annotation_2,annotation_3,annotation_4
0,Alex_25-06-10_T00_02_09.png,C_Alex_09_01.txt,../../chimp_zoo/Chimpanzees/Alex_25-06-10_T00_...,../../chimp_zoo/Chimpanzees/C_Alex_09_01.txt,0 0.48125 0.413802 0.158204 0.186458,,,,
1,Alex_25-06-10_T00_02_26.png,C_Alex_09_02.txt,../../chimp_zoo/Chimpanzees/Alex_25-06-10_T00_...,../../chimp_zoo/Chimpanzees/C_Alex_09_02.txt,0 0.439053 0.237187 0.20347 0.209999,,,,
2,Alex_25-06-10_T00_02_27.png,C_Alex_09_03.txt,../../chimp_zoo/Chimpanzees/Alex_25-06-10_T00_...,../../chimp_zoo/Chimpanzees/C_Alex_09_03.txt,0 0.540788 0.336562 0.184714 0.205001,,,,
3,Alex_30-06-10_1_T00_00_00_Jahaga.png,C_Alex_09_04.txt,../../chimp_zoo/Chimpanzees/Alex_30-06-10_1_T0...,../../chimp_zoo/Chimpanzees/C_Alex_09_04.txt,0 0.517346 0.387813 0.272855 0.30125,,,,
4,Alex_30-06-10_1_T00_00_28_Jahaga.png,C_Alex_09_05.txt,../../chimp_zoo/Chimpanzees/Alex_30-06-10_1_T0...,../../chimp_zoo/Chimpanzees/C_Alex_09_05.txt,0 0.377953 0.396099 0.322272 0.417854,,,,


**Amend file_names**

In [10]:
x0=train_data['annotation_file'].values

In [11]:
def replace_ext(file_name):
    return file_name.strip('txt') + 'png'

In [12]:
f0 = [replace_ext(x) for x in x0]

In [13]:
train_data['file_name']=f0

In [14]:
train_data.head()

Unnamed: 0,file_name,annotation_file,image_path,annotation_path,annotation_0,annotation_1,annotation_2,annotation_3,annotation_4
0,C_Alex_09_01.png,C_Alex_09_01.txt,../../chimp_zoo/Chimpanzees/Alex_25-06-10_T00_...,../../chimp_zoo/Chimpanzees/C_Alex_09_01.txt,0 0.48125 0.413802 0.158204 0.186458,,,,
1,C_Alex_09_02.png,C_Alex_09_02.txt,../../chimp_zoo/Chimpanzees/Alex_25-06-10_T00_...,../../chimp_zoo/Chimpanzees/C_Alex_09_02.txt,0 0.439053 0.237187 0.20347 0.209999,,,,
2,C_Alex_09_03.png,C_Alex_09_03.txt,../../chimp_zoo/Chimpanzees/Alex_25-06-10_T00_...,../../chimp_zoo/Chimpanzees/C_Alex_09_03.txt,0 0.540788 0.336562 0.184714 0.205001,,,,
3,C_Alex_09_04.png,C_Alex_09_04.txt,../../chimp_zoo/Chimpanzees/Alex_30-06-10_1_T0...,../../chimp_zoo/Chimpanzees/C_Alex_09_04.txt,0 0.517346 0.387813 0.272855 0.30125,,,,
4,C_Alex_09_05.png,C_Alex_09_05.txt,../../chimp_zoo/Chimpanzees/Alex_30-06-10_1_T0...,../../chimp_zoo/Chimpanzees/C_Alex_09_05.txt,0 0.377953 0.396099 0.322272 0.417854,,,,


**Amend paths**

In [15]:
x1 = train_data['annotation_path'].values

In [16]:
f1 = [replace_ext(y) for y in x1]    

In [17]:
train_data['image_path']=f1

In [18]:
train_data.head()

Unnamed: 0,file_name,annotation_file,image_path,annotation_path,annotation_0,annotation_1,annotation_2,annotation_3,annotation_4
0,C_Alex_09_01.png,C_Alex_09_01.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_01.png,../../chimp_zoo/Chimpanzees/C_Alex_09_01.txt,0 0.48125 0.413802 0.158204 0.186458,,,,
1,C_Alex_09_02.png,C_Alex_09_02.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_02.png,../../chimp_zoo/Chimpanzees/C_Alex_09_02.txt,0 0.439053 0.237187 0.20347 0.209999,,,,
2,C_Alex_09_03.png,C_Alex_09_03.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_03.png,../../chimp_zoo/Chimpanzees/C_Alex_09_03.txt,0 0.540788 0.336562 0.184714 0.205001,,,,
3,C_Alex_09_04.png,C_Alex_09_04.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_04.png,../../chimp_zoo/Chimpanzees/C_Alex_09_04.txt,0 0.517346 0.387813 0.272855 0.30125,,,,
4,C_Alex_09_05.png,C_Alex_09_05.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_05.png,../../chimp_zoo/Chimpanzees/C_Alex_09_05.txt,0 0.377953 0.396099 0.322272 0.417854,,,,


In [19]:
train_0_data = train_data.drop(columns=['annotation_1','annotation_2','annotation_3','annotation_4'])
train_1_data = train_data.drop(columns=['annotation_0','annotation_2','annotation_3','annotation_4'])
train_2_data = train_data.drop(columns=['annotation_0','annotation_1','annotation_3','annotation_4'])
train_3_data = train_data.drop(columns=['annotation_0','annotation_1','annotation_2','annotation_4'])
train_4_data = train_data.drop(columns=['annotation_0','annotation_1','annotation_2','annotation_3'])

train_0_data.dropna(inplace=True)
train_1_data.dropna(inplace=True)
train_2_data.dropna(inplace=True)
train_3_data.dropna(inplace=True)
train_4_data.dropna(inplace=True)

train_0_data=train_0_data.rename(columns={"annotation_0": "annotation"})
train_1_data=train_1_data.rename(columns={"annotation_1": "annotation"})
train_2_data=train_2_data.rename(columns={"annotation_2": "annotation"})
train_3_data=train_2_data.rename(columns={"annotation_3": "annotation"})
train_4_data=train_2_data.rename(columns={"annotation_4": "annotation"})

In [20]:
all_train_data = pd.concat([train_0_data,train_1_data,train_2_data,train_3_data,train_4_data])

In [21]:
all_train_data

Unnamed: 0,file_name,annotation_file,image_path,annotation_path,annotation
0,C_Alex_09_01.png,C_Alex_09_01.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_01.png,../../chimp_zoo/Chimpanzees/C_Alex_09_01.txt,0 0.48125 0.413802 0.158204 0.186458
1,C_Alex_09_02.png,C_Alex_09_02.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_02.png,../../chimp_zoo/Chimpanzees/C_Alex_09_02.txt,0 0.439053 0.237187 0.20347 0.209999
2,C_Alex_09_03.png,C_Alex_09_03.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_03.png,../../chimp_zoo/Chimpanzees/C_Alex_09_03.txt,0 0.540788 0.336562 0.184714 0.205001
3,C_Alex_09_04.png,C_Alex_09_04.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_04.png,../../chimp_zoo/Chimpanzees/C_Alex_09_04.txt,0 0.517346 0.387813 0.272855 0.30125
4,C_Alex_09_05.png,C_Alex_09_05.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_05.png,../../chimp_zoo/Chimpanzees/C_Alex_09_05.txt,0 0.377953 0.396099 0.322272 0.417854
...,...,...,...,...,...
2030,V416_13_14-12-10.png,V416_13_14-12-10.txt,../../chimp_zoo/Chimpanzees/V416_13_14-12-10.png,../../chimp_zoo/Chimpanzees/V416_13_14-12-10.txt,3 0.734635 0.604167 0.069791 0.122222
2032,V416_15_14-12-10.png,V416_15_14-12-10.txt,../../chimp_zoo/Chimpanzees/V416_15_14-12-10.png,../../chimp_zoo/Chimpanzees/V416_15_14-12-10.txt,3 0.746354 0.686111 0.083854 0.125
2035,V416_18_14-12-10.png,V416_18_14-12-10.txt,../../chimp_zoo/Chimpanzees/V416_18_14-12-10.png,../../chimp_zoo/Chimpanzees/V416_18_14-12-10.txt,3 0.641667 0.475926 0.080729 0.125
2052,Video_35_30-06-10_T00_02_07.png,Video_35_30-06-10_T00_02_07.txt,../../chimp_zoo/Chimpanzees/Video_35_30-06-10_...,../../chimp_zoo/Chimpanzees/Video_35_30-06-10_...,17 0.644532 0.89622 0.091583 0.194447


In [22]:
from tqdm import tqdm
import cv2

images = []
for image_path in tqdm(all_train_data['image_path'].values):
  data_point = {}
  data_point['image']=cv2.imread(image_path)
  data_point['path']=image_path
  images.append(data_point)

100%|██████████| 2543/2543 [01:18<00:00, 32.46it/s] 


In [23]:
none_images = [x for x in images if x['image'] is None]
none_paths = [x['path'] for x in none_images]

In [24]:
len(none_paths)

357

In [25]:
none_dataset=all_train_data[all_train_data['image_path'].isin(none_paths)]

In [26]:
none_dataset

Unnamed: 0,file_name,annotation_file,image_path,annotation_path,annotation
0,C_Alex_09_01.png,C_Alex_09_01.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_01.png,../../chimp_zoo/Chimpanzees/C_Alex_09_01.txt,0 0.48125 0.413802 0.158204 0.186458
1,C_Alex_09_02.png,C_Alex_09_02.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_02.png,../../chimp_zoo/Chimpanzees/C_Alex_09_02.txt,0 0.439053 0.237187 0.20347 0.209999
2,C_Alex_09_03.png,C_Alex_09_03.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_03.png,../../chimp_zoo/Chimpanzees/C_Alex_09_03.txt,0 0.540788 0.336562 0.184714 0.205001
3,C_Alex_09_04.png,C_Alex_09_04.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_04.png,../../chimp_zoo/Chimpanzees/C_Alex_09_04.txt,0 0.517346 0.387813 0.272855 0.30125
4,C_Alex_09_05.png,C_Alex_09_05.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_05.png,../../chimp_zoo/Chimpanzees/C_Alex_09_05.txt,0 0.377953 0.396099 0.322272 0.417854
...,...,...,...,...,...
65,CB_to_09_06.png,CB_to_09_06.txt,../../chimp_zoo/Chimpanzees/CB_to_09_06.png,../../chimp_zoo/Chimpanzees/CB_to_09_06.txt,10 0.37207 0.739583 0.063281 0.127604
67,CB_to_09_08.png,CB_to_09_08.txt,../../chimp_zoo/Chimpanzees/CB_to_09_08.png,../../chimp_zoo/Chimpanzees/CB_to_09_08.txt,6 0.822691 0.083334 0.103533 0.091667
88,C_Frodo_09_01.png,C_Frodo_09_01.txt,../../chimp_zoo/Chimpanzees/C_Frodo_09_01.png,../../chimp_zoo/Chimpanzees/C_Frodo_09_01.txt,5 0.241536 0.548394 0.114909 0.151042
219,C_to_10_01.png,C_to_10_01.txt,../../chimp_zoo/Chimpanzees/C_to_10_01.png,../../chimp_zoo/Chimpanzees/C_to_10_01.txt,11 0.925312 0.626045 0.025831 0.045016


In [27]:
none_dataset['image_path'] = none_dataset['image_path'].apply(lambda x: x.strip('png')+'JPG').values

In [28]:
from tqdm import tqdm

images = []
for image_path in tqdm(none_dataset['image_path'].values):
  data_point = {}
  data_point['image']=cv2.imread(image_path)
  data_point['path']=image_path
  images.append(data_point)

100%|██████████| 357/357 [00:17<00:00, 20.45it/s]


In [29]:
none_images = [x for x in images if x['image'] is None]
none_paths = [x['path'] for x in none_images]

In [30]:
len(none_paths)

5

In [31]:
none_dataset=none_dataset[~none_dataset['image_path'].isin(none_paths)]

In [32]:
none_dataset

Unnamed: 0,file_name,annotation_file,image_path,annotation_path,annotation
0,C_Alex_09_01.png,C_Alex_09_01.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_01.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_01.txt,0 0.48125 0.413802 0.158204 0.186458
1,C_Alex_09_02.png,C_Alex_09_02.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_02.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_02.txt,0 0.439053 0.237187 0.20347 0.209999
2,C_Alex_09_03.png,C_Alex_09_03.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_03.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_03.txt,0 0.540788 0.336562 0.184714 0.205001
3,C_Alex_09_04.png,C_Alex_09_04.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_04.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_04.txt,0 0.517346 0.387813 0.272855 0.30125
4,C_Alex_09_05.png,C_Alex_09_05.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_05.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_05.txt,0 0.377953 0.396099 0.322272 0.417854
...,...,...,...,...,...
65,CB_to_09_06.png,CB_to_09_06.txt,../../chimp_zoo/Chimpanzees/CB_to_09_06.JPG,../../chimp_zoo/Chimpanzees/CB_to_09_06.txt,10 0.37207 0.739583 0.063281 0.127604
67,CB_to_09_08.png,CB_to_09_08.txt,../../chimp_zoo/Chimpanzees/CB_to_09_08.JPG,../../chimp_zoo/Chimpanzees/CB_to_09_08.txt,6 0.822691 0.083334 0.103533 0.091667
88,C_Frodo_09_01.png,C_Frodo_09_01.txt,../../chimp_zoo/Chimpanzees/C_Frodo_09_01.JPG,../../chimp_zoo/Chimpanzees/C_Frodo_09_01.txt,5 0.241536 0.548394 0.114909 0.151042
219,C_to_10_01.png,C_to_10_01.txt,../../chimp_zoo/Chimpanzees/C_to_10_01.JPG,../../chimp_zoo/Chimpanzees/C_to_10_01.txt,11 0.925312 0.626045 0.025831 0.045016


In [33]:
none_dataset['file_name']=none_dataset['file_name'].apply(lambda x: x.strip('png') + 'JPG')

In [34]:
none_dataset

Unnamed: 0,file_name,annotation_file,image_path,annotation_path,annotation
0,C_Alex_09_01.JPG,C_Alex_09_01.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_01.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_01.txt,0 0.48125 0.413802 0.158204 0.186458
1,C_Alex_09_02.JPG,C_Alex_09_02.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_02.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_02.txt,0 0.439053 0.237187 0.20347 0.209999
2,C_Alex_09_03.JPG,C_Alex_09_03.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_03.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_03.txt,0 0.540788 0.336562 0.184714 0.205001
3,C_Alex_09_04.JPG,C_Alex_09_04.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_04.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_04.txt,0 0.517346 0.387813 0.272855 0.30125
4,C_Alex_09_05.JPG,C_Alex_09_05.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_05.JPG,../../chimp_zoo/Chimpanzees/C_Alex_09_05.txt,0 0.377953 0.396099 0.322272 0.417854
...,...,...,...,...,...
65,CB_to_09_06.JPG,CB_to_09_06.txt,../../chimp_zoo/Chimpanzees/CB_to_09_06.JPG,../../chimp_zoo/Chimpanzees/CB_to_09_06.txt,10 0.37207 0.739583 0.063281 0.127604
67,CB_to_09_08.JPG,CB_to_09_08.txt,../../chimp_zoo/Chimpanzees/CB_to_09_08.JPG,../../chimp_zoo/Chimpanzees/CB_to_09_08.txt,6 0.822691 0.083334 0.103533 0.091667
88,C_Frodo_09_01.JPG,C_Frodo_09_01.txt,../../chimp_zoo/Chimpanzees/C_Frodo_09_01.JPG,../../chimp_zoo/Chimpanzees/C_Frodo_09_01.txt,5 0.241536 0.548394 0.114909 0.151042
219,C_to_10_01.JPG,C_to_10_01.txt,../../chimp_zoo/Chimpanzees/C_to_10_01.JPG,../../chimp_zoo/Chimpanzees/C_to_10_01.txt,11 0.925312 0.626045 0.025831 0.045016


In [35]:
all_train_data = pd.concat([all_train_data,none_dataset])

In [36]:
all_train_data

Unnamed: 0,file_name,annotation_file,image_path,annotation_path,annotation
0,C_Alex_09_01.png,C_Alex_09_01.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_01.png,../../chimp_zoo/Chimpanzees/C_Alex_09_01.txt,0 0.48125 0.413802 0.158204 0.186458
1,C_Alex_09_02.png,C_Alex_09_02.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_02.png,../../chimp_zoo/Chimpanzees/C_Alex_09_02.txt,0 0.439053 0.237187 0.20347 0.209999
2,C_Alex_09_03.png,C_Alex_09_03.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_03.png,../../chimp_zoo/Chimpanzees/C_Alex_09_03.txt,0 0.540788 0.336562 0.184714 0.205001
3,C_Alex_09_04.png,C_Alex_09_04.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_04.png,../../chimp_zoo/Chimpanzees/C_Alex_09_04.txt,0 0.517346 0.387813 0.272855 0.30125
4,C_Alex_09_05.png,C_Alex_09_05.txt,../../chimp_zoo/Chimpanzees/C_Alex_09_05.png,../../chimp_zoo/Chimpanzees/C_Alex_09_05.txt,0 0.377953 0.396099 0.322272 0.417854
...,...,...,...,...,...
65,CB_to_09_06.JPG,CB_to_09_06.txt,../../chimp_zoo/Chimpanzees/CB_to_09_06.JPG,../../chimp_zoo/Chimpanzees/CB_to_09_06.txt,10 0.37207 0.739583 0.063281 0.127604
67,CB_to_09_08.JPG,CB_to_09_08.txt,../../chimp_zoo/Chimpanzees/CB_to_09_08.JPG,../../chimp_zoo/Chimpanzees/CB_to_09_08.txt,6 0.822691 0.083334 0.103533 0.091667
88,C_Frodo_09_01.JPG,C_Frodo_09_01.txt,../../chimp_zoo/Chimpanzees/C_Frodo_09_01.JPG,../../chimp_zoo/Chimpanzees/C_Frodo_09_01.txt,5 0.241536 0.548394 0.114909 0.151042
219,C_to_10_01.JPG,C_to_10_01.txt,../../chimp_zoo/Chimpanzees/C_to_10_01.JPG,../../chimp_zoo/Chimpanzees/C_to_10_01.txt,11 0.925312 0.626045 0.025831 0.045016


In [37]:
from tqdm import tqdm

images = []
for image_path in tqdm(all_train_data['image_path'].values):
  data_point = {}
  data_point['image']=cv2.imread(image_path)
  data_point['path']=image_path
  images.append(data_point)

100%|██████████| 2895/2895 [01:37<00:00, 29.64it/s] 


In [38]:
none_images = [x for x in images if x['image'] is None]
none_paths = [x['path'] for x in none_images]

In [39]:
len(none_images)

357

In [40]:
latest_df = all_train_data[~all_train_data['image_path'].isin(none_paths)]

In [41]:
from tqdm import tqdm

images = []
for image_path in tqdm(latest_df['image_path'].values):
  data_point = {}
  data_point['image']=cv2.imread(image_path)
  data_point['path']=image_path
  images.append(data_point)

100%|██████████| 2538/2538 [03:05<00:00, 13.71it/s]


In [45]:
len(images)

2538

In [42]:
none_images = [x for x in images if x['image'] is None]
none_paths = [x['path'] for x in none_images]

In [44]:
latest_df.to_csv('../data/chimp_zoo_chimpanzees.csv')