In [None]:
# Machine Learning and Data Science Imports
import tensorflow_probability as tfp
import tensorflow_datasets as tfds
import tensorflow_addons as tfa
import tensorflow_hub as hub
from skimage import exposure
import pandas as pd; pd.options.mode.chained_assignment = None
import numpy as np
import scipy

# Built In Imports
from datetime import datetime
from glob import glob
import warnings
import IPython
import urllib
import json
import zipfile
import pickle
import shutil
import string
import math
import tqdm
import time
import os
import gc
import re

# Visualization Imports
from matplotlib.colors import ListedColormap
import matplotlib.patches as patches
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from PIL import Image
import matplotlib
import plotly
import PIL
import cv2

# PRESETS
FIG_FONT = dict(family="Helvetica, Arial", size=14, color="#7f7f7f")
LABEL_COLORS = [px.colors.label_rgb(px.colors.convert_to_RGB_255(x)) for x in sns.color_palette("Spectral", 15)]
LABEL_COLORS_WOUT_NO_FINDING = LABEL_COLORS[:8]+LABEL_COLORS[9:]

# Other Imports
from pydicom.pixel_data_handlers.util import apply_voi_lut
from tqdm.notebook import tqdm
import pydicom

In [None]:
json_folder_path = "/kaggle/input/iwildcam2021-fgvc8/metadata"
list_of_files = list(os.listdir(json_folder_path))

for file_name in list_of_files:
    json_path = os.path.join(json_folder_path, file_name)
    print(f"Current json processed: {file_name}")
    with open(json_path) as json_file:
        # read each json
        json_data = json.load(json_file)
        # for each item in the json
        for item in json_data.items():
            # prepare the dataframe name
            file_name_split = file_name.split(".")[0]
            file_name_split = file_name_split.split("_")
            file_name_str = file_name_split[1] + "_" + file_name_split[2]
            print(f"\tCurrent json item processed: {item[0]} length: {len(item[1])}")
            data_frame_name = f"{file_name_str}_{item[0]}_df"
            print(f"\tDynamic dataframe created: {data_frame_name}")
            # dynamic creation of a dataframe, using vars()[data_frame_name]
            vars()[data_frame_name] = pd.json_normalize(json_data.get(item[0]))
            # output the dataframe
            vars()[data_frame_name].to_csv(f"{data_frame_name}", index=False)

In [None]:
print(megadetector_results_images_df.shape)
pd.set_option('display.max_colwidth', None)
megadetector_results_images_df.head()


In [None]:
bbox_train = megadetector_results_images_df[megadetector_results_images_df.max_detection_conf > .7]

In [None]:
bbox_train.head(117)

In [None]:
#Just testing we have access to the images
import matplotlib.pyplot as plt
import matplotlib.image as img

image = img.imread('/kaggle/input/iwildcam2021-fgvc8/train/905cd794-21bc-11ea-a13a-137349068a90.jpg')
plt.imshow(image)
plt.show()

In [None]:
#Function to break the multiple Json detections to individual rows

def explode(df, lst_cols, fill_value='', preserve_index=False):
    # make sure `lst_cols` is list-alike
    if (lst_cols is not None
        and len(lst_cols) > 0
        and not isinstance(lst_cols, (list, tuple, np.ndarray, pd.Series))):
        lst_cols = [lst_cols]
    # all columns except `lst_cols`
    idx_cols = df.columns.difference(lst_cols)
    # calculate lengths of lists
    lens = df[lst_cols[0]].str.len()
    # preserve original index values    
    idx = np.repeat(df.index.values, lens)
    # create "exploded" DF
    res = (pd.DataFrame({
                col:np.repeat(df[col].values, lens)
                for col in idx_cols},
                index=idx)
             .assign(**{col:np.concatenate(df.loc[lens>0, col].values)
                            for col in lst_cols}))
    # append those rows that have empty lists
    if (lens == 0).any():
        # at least one list in cells is empty
        res = (res.append(df.loc[lens==0, idx_cols], sort=False)
                  .fillna(fill_value))
    # revert the original index order
    res = res.sort_index()
    # reset index if requested
    if not preserve_index:        
        res = res.reset_index(drop=True)
    return res

In [None]:
#Execute function
bbox_train = explode(bbox_train, ['detections'], fill_value='')

In [None]:
bbox_train.head()

In [None]:
#break out detections to components
bbox_3 = bbox_train['detections'].values.tolist()
xyz = pd.DataFrame(bbox_3,columns =['category', 'bbox','conf'])

In [None]:
#Rejoin results from previous cell
Train_DF=pd.concat([bbox_train, xyz], axis=1)
Train_DF.head()

In [None]:
#Remove unneeded fields
Train_DF.drop(['max_detection_conf','detections'], axis=1, inplace=True)
Train_DF.head()

In [None]:
#Filter out poor detection
Train_DF = Train_DF[Train_DF.conf > .69]
Train_DF.head()

In [None]:
#Add image path to DF
Train_DF['image_path'] = f'/kaggle/input/iwildcam2021-fgvc8/train/'+Train_DF.id+('.jpg')
Train_DF.head()

In [None]:
#Split the list for BBox into mins/maxes
Train_DF[['X_min','Y_max', 'W', 'H']] = pd.DataFrame(Train_DF.bbox.tolist(), index= Train_DF.index)
Train_DF.head()

In [None]:
#Randomize dataframe rows (because I'm going to split the DF to Train Test)

Train_DF=Train_DF.sample(frac=1)
Train_DF.head()

In [None]:
Train_DF = Train_DF.rename(columns={'id': 'image_id'})

In [None]:
train_annotations_annotations_df

In [None]:
Train_Merge = pd.merge(Train_DF, 
                       train_annotations_annotations_df, 
                       on='image_id', how='left')
Train_Merge.head()

In [None]:
train_annotations_categories_df['category_id'] = train_annotations_categories_df['id']

In [None]:
#Merge the Animal types associated to the image detection
Train_Merge = pd.merge(Train_Merge, 
                       train_annotations_categories_df, 
                       on='category_id', how='left')
Train_Merge.head()

In [None]:
#drop NA values
print(len(Train_Merge))
Train_No_Null = Train_Merge[Train_Merge['category_id'].notna()]
print(len(Train_No_Null))

In [None]:
result = pd.merge(train_annotations_categories_df, Train_No_Null, how="inner", on=["category_id"])
dictionary = pd.DataFrame(result, columns=['name_x', 'category_id'])
dictionary=dictionary.drop_duplicates(subset=['category_id'])
dictionary['new_category_id'] = range(len(dictionary))
dictionary

In [None]:
Train_No_Null=pd.merge(dictionary,Train_No_Null, how="inner", on=["category_id"])
Train_No_Null.head()

In [None]:
Train_No_Null['category_id'] = Train_No_Null['category_id'].astype(str)
Unique_Category_List=Train_No_Null['category_id'].unique()
Unique_Category_List=Unique_Category_List.tolist()
print(Unique_Category_List)

In [None]:
Train_DF_Final = Train_No_Null.copy()
Train_DF_Final = Train_DF_Final[0:0]
Test_DF_Final = Train_DF_Final[0:0]


In [None]:
for value in Unique_Category_List:
    temp_df=Train_No_Null[Train_No_Null.category_id == value].head(5)
    Train_DF_Final=Train_DF_Final.append(temp_df)
    del temp_df
Train_DF_Final

In [None]:
for value in Unique_Category_List:
    temp_df=Train_No_Null[Train_No_Null.category_id == value].tail(2)
    Test_DF_Final=Test_DF_Final.append(temp_df)
    del temp_df
Test_DF_Final

In [None]:
#used to create the yolo label values
'''
Train_No_Null['x_mid'] = Train_No_Null.apply(lambda row: (row.X_min+((row.W)/2)), axis =1)
Train_No_Null['y_mid'] = Train_No_Null.apply(lambda row: ((1-(row.Y_max))-((row.H)/2)), axis =1)


Train_No_Null.head()'''

In [None]:
#os.mkdir("/kaggle/working/text_folder/")

In [None]:
#used to create the yolo label values
#Train_No_Null=pd.merge(dictionary,Train_No_Null, how="inner", on=["category_id"])
#Yolo_Label_df=Train_No_Null[["image_id", "new_category_id", "x_mid", "y_mid", "W", "H"]]
#Train_No_Null
#Yolo_Label_df

In [None]:
#used to create the yolo label values
'''for index, row in tqdm(Yolo_Label_df.iterrows()):
    file_name = "/kaggle/working/text_folder/" + row['image_id']+".txt"
    pd.DataFrame(row).T.to_csv(file_name, columns=["new_category_id", "x_mid", "y_mid", "W", "H"], header=None, index=None, sep=' ', mode='a')'''

In [None]:
train_files = []
val_files   = []
val_files += list(Test_DF_Final.image_path.unique())
train_files += list(Train_DF_Final.image_path.unique())
len(train_files), len(val_files)

In [None]:
os.makedirs('/kaggle/working/animals/labels/train', exist_ok = True)
os.makedirs('/kaggle/working/animals/labels/val', exist_ok = True)
os.makedirs('/kaggle/working/animals/images/train', exist_ok = True)
os.makedirs('/kaggle/working/animals/images/val', exist_ok = True)
label_dir = '/kaggle/input/animal-detect-yolo-labels/text_folder'
for file in tqdm(train_files):
    try:
        shutil.copy(file, '/kaggle/working/animals/images/train')
        filename = file.split('/')[-1].split('.')[0]
        shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/animals/labels/train')
    except:
        pass
    
for file in tqdm(val_files):
    try:
        shutil.copy(file, '/kaggle/working/animals/images/val')
        filename = file.split('/')[-1].split('.')[0]
        shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/animals/labels/val')
    except:
        pass

In [None]:
#!pip install zip_files
#!zip-folder --auto-root --outfile /kaggle/working/text_folder.zip /kaggle/working/text_folder

In [None]:
'''from IPython.display import FileLink
FileLink(r'./text_folder.zip')'''

In [None]:
 '''import os
 arr = os.listdir('./animals/images/')
 print(arr)'''

In [None]:
'''os.chdir('/kaggle/working/')
import os
os.remove("./animals.yaml")'''

In [None]:
'''from shutil import rmtree

rmtree("./animals")'''

In [None]:
import os

path, dirs, files = next(os.walk("/kaggle/working/animals/images/train"))
file_count = len(files)
print(file_count)

In [None]:
class_ids, class_names = list(zip(*set(zip(Train_DF_Final.new_category_id, Train_DF_Final.name))))
classes = list(np.array(class_names)[np.argsort(class_ids)])
classes = list(map(lambda x: str(x), classes))
classes

In [None]:
from os import listdir
from os.path import isfile, join
import yaml

cwd = '/kaggle/working/'

with open(join( cwd , 'train.txt'), 'w') as f:
    for path in glob('/kaggle/working/animals/images/train/*'):
        f.write(path+'\n')
            
with open(join( cwd , 'val.txt'), 'w') as f:
    for path in glob('/kaggle/working/animals/images/val/*'):
        f.write(path+'\n')

data = dict(
    train =  join( cwd , 'train.txt') ,
    val   =  join( cwd , 'val.txt' ),
    nc    = 198,
    names = classes
    )

with open(join( cwd , 'animals.yaml'), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open(join( cwd , 'animals.yaml'), 'r')
print('\nyaml:')
print(f.read())

In [None]:
shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5')
os.chdir('/kaggle/working/yolov5')


import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
!WANDB_MODE="dryrun" python train.py --img 400 --batch 40 --epochs 100 --data /kaggle/working/animals.yaml --weights yolov5x.pt --cache

In [None]:
#Detect Me On It

!python detect.py --weights 'runs/train/exp/weights/best.pt'\
--img 500\
--conf 0.15\
--iou 0.5\
--source /kaggle/working/vinbigdata/images/valy\
--save-conf\
--save-txt

In [None]:
#WORKING QUERIES BELOW /// IGNORE

In [None]:
os.chdir('/kaggle/working')

In [None]:
arr = os.listdir('/kaggle/working/animals/labels/train')
print(arr)

In [None]:
#Just testing we have access to the images
import matplotlib.pyplot as plt
import matplotlib.image as img

image = img.imread('/kaggle/working/animals/images/train/90006c84-21bc-11ea-a13a-137349068a90.jpg')
plt.imshow(image)
plt.show()

In [None]:
image_path_list = Train_DF_Final['image_path'].tolist()

In [None]:
counter=0
for file in image_path_list:
    if os.path.exists(file)==True:
        print('true')
    else:
        print('fail')'''

os.path.exists('../input/iwildcam2021-fgvc8/train/86760c00-21bc-11ea-a13a-137349068a90.jpg')

../input/iwildcam2021-fgvc8/train

In [None]:
#Just testing we have access to the images
import matplotlib.pyplot as plt
import matplotlib.image as img

image = img.imread('/kaggle/input/iwildcam2021-fgvc8/train/86760c00-21bc-11ea-a13a-137349068a90.jpg')
plt.imshow(image)
