# Processing CSV Files

In [1]:
import pandas as pd
import numpy as np
import csv
import os

## Read CSV files

In [2]:
# Read all ver3 csv files
df = pd.DataFrame()
for file in os.listdir("."):
    if file.endswith("ver3.csv"):
        temp = pd.read_csv(file)
        df = pd.concat([df, temp], ignore_index=True)

# delete cage and date_annotated column
df = df.drop(['DATE_ANNOTATED','TOTAL_NUM_MK','IDABLE_NUM_MK'], axis=1) 
print(df.shape)

# sort by file_name and reset index
df = df.sort_values(by=['FILE_NAME'])
df = df.reset_index(drop=True)
df.head(10)

(6751, 7)


Unnamed: 0,FILE_NAME,MKGROUP,MKIDS,TOYS,UNKNOWN_GUESS,COMMENTS,CAGE
0,20220308_SD02_2923.JPG,Stranger Things,[DF2I],,,,
1,20220308_SD02_2924.JPG,Stranger Things,[DF2I],,,,
2,20220308_SD02_2946.JPG,Zombies,"[7124, 110E, 151J]",,,,
3,20220308_SD02_2954.JPG,Zombies,"[7124, 110E, 151J]",,,,
4,20220308_SD02_2955.JPG,Zombies,"[7124, 110E, 151J]",,,,
5,20220308_SD02_2956.JPG,Zombies,"[7124, 110E, 151J]",,,,
6,20220308_SD02_2958.JPG,Zombies,[67G],,,,
7,20220308_SD02_2959.JPG,Zombies,[67G],,,,
8,20220308_SD02_2960.JPG,Zombies,[67G],,,,
9,20220308_SD02_2961.JPG,Zombies,[67G],,,,


In [3]:
# Change a few column names and generate jpg_id
df.columns = df.columns.str.lower()
df = df.rename(columns={'mkids': 'monkey_name'})
df = df.rename(columns={'mkgroup': 'monkey_group'})
df = df.reset_index().rename(columns={'index': 'jpg_id'})
df.head(5)

Unnamed: 0,jpg_id,file_name,monkey_group,monkey_name,toys,unknown_guess,comments,cage
0,0,20220308_SD02_2923.JPG,Stranger Things,[DF2I],,,,
1,1,20220308_SD02_2924.JPG,Stranger Things,[DF2I],,,,
2,2,20220308_SD02_2946.JPG,Zombies,"[7124, 110E, 151J]",,,,
3,3,20220308_SD02_2954.JPG,Zombies,"[7124, 110E, 151J]",,,,
4,4,20220308_SD02_2955.JPG,Zombies,"[7124, 110E, 151J]",,,,


## Create PHOTO Table

In [4]:
# Make PHOTOS Table
photo_table = df[['jpg_id','file_name','monkey_group']].copy()
photo_table['sd_card'] = photo_table['file_name'].str.extract(r'_(SD\d+)_')
photo_table['date_taken'] = pd.to_datetime(photo_table['file_name'].str.extract(r'(\d{4})(\d{2})(\d{2})').apply(lambda x: '-'.join(x), axis=1))

# Re-organize the order of columns
original_columns = ['jpg_id', 'file_name', 'monkey_group','sd_card','date_taken']
new_columns = ['jpg_id', 'file_name', 'sd_card','date_taken','monkey_group']
photo_table = photo_table[new_columns]

photo_table.tail(8)

Unnamed: 0,jpg_id,file_name,sd_card,date_taken,monkey_group
6743,6743,20220825_SD05_6949.JPG,SD05,2022-08-25,Instigators
6744,6744,20220825_SD05_6952.JPG,SD05,2022-08-25,Instigators
6745,6745,20220825_SD05_6954.JPG,SD05,2022-08-25,Instigators
6746,6746,20220825_SD05_6957.JPG,SD05,2022-08-25,Instigators
6747,6747,20220825_SD05_6958.JPG,SD05,2022-08-25,Instigators
6748,6748,20220825_SD05_6960.JPG,SD05,2022-08-25,Instigators
6749,6749,20220825_SD05_6963.JPG,SD05,2022-08-25,Instigators
6750,6750,20220825_SD05_6967.JPG,SD05,2022-08-25,Instigators


In [5]:
# SAVE
photo_table.to_csv('../fromcsv_photo.csv', index=False)
print("PHOTO TABLE SAVED!")

PHOTO TABLE SAVED!


## Create TOY Table

In [6]:
# Extract toys
toy_table = df[['jpg_id','toys']].copy().dropna()
toy_table = toy_table.sort_values(by=['jpg_id'])

# Explode 
toy_table['toys'] = toy_table['toys'].str.split(',')
toy_table = toy_table.explode('toys', ignore_index = True)

# Clean up a few small things
toy_table['toys'] = toy_table['toys'].str.replace(' ', '')
toy_table['toys'] = toy_table['toys'].str.replace('BlueFrisbees', 'BlueFrisbee')

# Remove any cage structures
toy_table = toy_table[~toy_table['toys'].str.contains('triangle|swing', case=False)]

# Rename the column 
toy_table = toy_table.rename(columns={'toys': 'toy'})

# Display
display(toy_table.head())
toy_table['toy'].unique()

Unnamed: 0,jpg_id,toy
0,38,Unknown
1,60,DentalStar
2,79,Dumbbell
3,163,Grenade
4,163,Grenade


array(['Unknown', 'DentalStar', 'Dumbbell', 'Grenade', 'Kong',
       'LuckyCloverChew', 'MegalastBall', 'Ziggs', 'BlueFrisbee',
       'DentalKong', 'Zyro', 'Hex', 'S-shapedToy', 'ChallengerBall'],
      dtype=object)

In [7]:
# SAVE
toy_table.to_csv('../fromcsv_toy.csv', index=False)
print("TOY TABLE SAVED!")

TOY TABLE SAVED!


## Create MONKEY Table

In [8]:
# Extract monkeys
monkey_table = df[['jpg_id','monkey_name','unknown_guess']].copy()
monkey_table = monkey_table.sort_values(by=['jpg_id'])

# Explode 
monkey_table['monkey_name'] = monkey_table['monkey_name'].str.strip('[]')
monkey_table['monkey_name'] = monkey_table['monkey_name'].str.split(',')
monkey_table = monkey_table.explode('monkey_name')

# Clean up a few things
monkey_table['monkey_name'] = monkey_table['monkey_name'].str.replace(' ', '')

# Display
display(monkey_table.tail(10))
monkey_table.monkey_name.unique()

Unnamed: 0,jpg_id,monkey_name,unknown_guess
6742,6742,Unknown1,K
6743,6743,G942,
6743,6743,114J,
6744,6744,G942,
6745,6745,G942,
6746,6746,G942,
6747,6747,G942,
6748,6748,G942,
6749,6749,G942,
6750,6750,G942,


array(['DF2I', '7124', '110E', '151J', '67G', '94B', '72X', '143H', '69X',
       '87J', '14F', '19J', '101G', 'G701', '68F', '49Y', 'Unknown1',
       '79G', '68E', '167I', '86I', '59E', '134J', '35Y', 'Unknown2',
       'G942', '144H', '58I', '46J', '37I', '26J', '48Z', '70G', '114J',
       '42Z', '42K', '0EX', '36J', '0FL', '68Y', '81G', '40J', '09X',
       '58K', '59K', '67K', '98K', '120K', '126K', '139K', '122K', '129K',
       '170K'], dtype=object)

In [9]:
# SAVE
monkey_table.to_csv('../fromcsv_monkey.csv', index=False)