## Data Combining and Preparing for YOLOv4

#### We will be combine all the annotation files generated from multiple sources and make it ready for our training model (Yolov4).
#### Along with that, we will also place the training images and labels to the relevant folders inside "darknet" for the model to train

In [76]:
# Importing necessary libraries

import pandas as pd
import os


In [63]:
annotation_1 = pd.read_csv('../processed_files/annotation_1.csv')
annotation_2 = pd.read_csv('../processed_files/annotation_2.csv')
annotation_3 = pd.read_csv('../processed_files/annotation_3.csv')
annotation_1.shape, annotation_2.shape , annotation_3.shape

((3914, 6), (1132, 6), (632, 6))

In [64]:
# Appending all the annotations to make a final file
annotations = pd.concat([annotation_1,annotation_2,annotation_3],axis=0).reset_index()
annotations.shape


(5678, 7)

In [65]:
annotations.tail()

Unnamed: 0,index,filename,class_name,center-x,center-y,normalised_width,normalised_height
5673,627,young-medical-woman-making-thumbs-600w-1320570...,Good,0.4925,0.267857,0.245,0.37381
5674,628,young-surgeon-indian-woman-blue-260nw-16798741...,Bad,0.547436,0.276786,0.223077,0.346429
5675,629,young-woman-demonstrates-right-wrong-260nw-171...,Good,0.248547,0.408929,0.215116,0.682143
5676,630,young-woman-demonstrates-right-wrong-260nw-171...,Bad,0.792878,0.4375,0.207849,0.632143
5677,631,young-woman-wears-home-made-600w-1704465010.jpg,Bad,0.385,0.40172,0.476667,0.759214


In [66]:
annotations.groupby('class_name').count().reset_index()

Unnamed: 0,class_name,index,filename,center-x,center-y,normalised_width,normalised_height
0,Bad,202,202,201,201,201,201
1,Good,311,311,310,310,310,310
2,,119,119,119,119,119,119
3,bad,667,667,667,667,667,667
4,good,3121,3121,3121,3121,3121,3121
5,none,1258,1258,1258,1258,1258,1258


In [67]:
# Make the class_names case-insensitive and merge "Bad" and "None" classes into one. 
annotations['class_name'] = annotations['class_name'].str.lower()
annotations['class_name'] = annotations['class_name'].str.replace('none', 'bad')
annotations.groupby('class_name').count().reset_index()

Unnamed: 0,class_name,index,filename,center-x,center-y,normalised_width,normalised_height
0,bad,2246,2246,2245,2245,2245,2245
1,good,3432,3432,3431,3431,3431,3431


In [68]:
# Label Encoding for class_name
# Import label encoder 
from sklearn import preprocessing 
  
# label_encoder object knows how to understand word labels. 
label_encoder = preprocessing.LabelEncoder() 
  
# Encode labels in column 'species'. 
annotations['class_id']= label_encoder.fit_transform(annotations['class_name']) 
annotations=annotations[['filename','class_id','center-x','center-y','normalised_width','normalised_height']]

In [69]:
# Saving the final annotations file
annotations.to_csv('/data/logo_detection/Hackathon/processed_files/annotations_yolo.csv')

### Placing the training labels and images into the respective folders in "darknet"


In [96]:
#read relevant processed annotation file to access training_filename, create text files and place it in relevant folder
def create_text_files(annotation_path, training_path):
    df = pd.read_csv(annotation_path)
    filenames = list(df['filename'])
    print(len(filenames))
    for i in filenames:
        file = i
        file=file.replace('.jpg','.txt')
        file=file.replace('.jpeg','.txt')
        file= file.replace('.png','.txt')
        path = training_path+file
        df_row = df[df['filename']==i]
        df_row= df_row[['class_id','center-x','center-y','normalised_width','normalised_height']]
        df_row.to_csv(path, header=None, index=None, sep=' ')
        
def transfer_training_images(image_path,annotation_path,training_path):
    df = pd.read_csv(annotation_path)
    rename_dict = dict(zip(df.filename, df.training_filename))
    filenames = list(df['filename'])
    for file in filenames:
        jpgfile= os.path.join(image_path, file)
        shutil.copy(jpgfile, training_path)
        key= jpgfile.split('/')[-1]
        old_dst_file_name = os.path.join(training_path,key)
        new_dst_file_name = os.path.join(training_path, rename_dict[key])
        os.rename(old_dst_file_name,new_dst_file_name)
        
        
def create_train_text_file(train_text_path, training_path):
#     f= open(train_text_path+'train.txt','r+')
#     f.truncate()
#     f.close()
    for jpgfile in os.listdir('/data/logo_detection/Hackathon/raw_data/images_3/'):
        f=open(train_text_path+'train.txt','a+')
        f.write('/data/logo_detection/Hackathon/darknet/data/obj/'+jpgfile)
        #f.replace('/data/logo_detection/hack/images/','/data/logo_detection/hack/darknet/build/darknet/x64/data/')
        f.write('\n')


In [98]:
if __name__=='__main__':
    annotation_path = '/data/logo_detection/Hackathon/processed_files/annotations_yolo.csv'
    image_path = '/data/logo_detection/Hackathon/raw_data/images_3/'
    training_path = '/data/logo_detection/Hackathon/darknet/data/obj/'
    train_text_path='/data/logo_detection/Hackathon/darknet/build/darknet/x64/data/'
    create_text_files(annotation_path,training_path)
    transfer_training_images(image_path,annotation_path,training_path)
    create_train_text_file(train_text_path,training_path)
    

### The data has been kept in the relevant folders for the training yolov4