#Initial Setup

###Import main libraries

In [0]:
# import main ML libraries 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
# for image processing, we import openCV
import cv2 as cv
# for interfacing with filesystem
import os
# for using randomness
from random import shuffle
# for listing progress bars where we need them
from tqdm import tqdm
# for converting png to jpg
from PIL import Image
%matplotlib inline

###Set initial variables

In [0]:
batch_path = '/content/gdrive/My Drive/batchx'
false_path = '/content/gdrive/My Drive/batchx/0'
true_path =  '/content/gdrive/My Drive/batchx/1'

### **Authenticate with google**

In [39]:
from google.colab import drive
# Mount google drive filesystem to /content/gdrive/My Drive/*
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


###Init drive service (v3)

In [40]:
print('Hearing horses (0)')
!ls '/content/gdrive/My Drive/batchx/0'

Hearing horses (0)
abigail-verberne-1179842-unsplash.jpg
adam-jang-314067-unsplash.jpg
alexandra-zota-1242250-unsplash.jpg
alex-blajan-119661-unsplash.jpg
alice-donovan-rouse-115131-unsplash.jpg
annie-spratt-207738-unsplash.jpg
annie-spratt-587023-unsplash.jpg
carlee-dittemore-1226247-unsplash.jpg
courtney-miller-1230955-unsplash.jpg
crystal-huff-757104-unsplash.jpg
danny-gallegos-355559-unsplash.jpg
darius-soodmand-118384-unsplash.jpg
florian-van-duyn-387776-unsplash.jpg
gene-devine-476058-unsplash.jpg
ghost-presenter-425367-unsplash.jpg
james-coleman-1210076-unsplash.jpg
joanne-o-keefe-270785-unsplash.jpg
liam-martens-716280-unsplash.jpg
lily-banse-460652-unsplash.jpg
lukas-l-452028-unsplash.jpg
melanie-hartshorn-348846-unsplash.jpg
michael-mroczek-115985-unsplash.jpg
nicolas-j-leclercq-1141089-unsplash.jpg
paolo-nicolello-1171866-unsplash.jpg
peter-kisteman-329830-unsplash.jpg
randy-fath-667618-unsplash.jpg
randy-fath-736175-unsplash.jpg
revolt-386471-unsplash.jpg
samantha-scholl-15

In [41]:
print('Deaf horses (1)')
!ls '/content/gdrive/My Drive/batchx/1'from PIL import Image

Deaf horses (1)
'1655930_10152876159715960_1332774283920715435_n (1).jpg'
 1655930_10152876159715960_1332774283920715435_n.jpg
 16962_1201287838262_1410553281_30489563_4324513_n-300x225.jpg
 19139_1309272405249_1033766008_30956817_4480404_n-225x300.jpg
 25272_1344971737710_1033766008_31050554_2864086_n-300x238.jpg
'37852155_10156550080655960_5003683379976077312_o (1).jpg'
 37852155_10156550080655960_5003683379976077312_o.jpg
'37853970_10156550080000960_4414984096887865344_n (1).png'
 37853970_10156550080000960_4414984096887865344_n.png
 41706122_10156677046735960_5167040244876312576_n.jpg
 42297474_10156687915410960_1788544808642412544_n.png
 42749174_10156704324275960_6505795329294598144_n.png
'44032542_10156743316045960_6073517715744620544_n (1).jpg'
 44032542_10156743316045960_6073517715744620544_n.jpg
 44395302_10156758646395960_5481495541013544960_n.jpg
'45742689_10156808706615960_8187578173060284416_o (1).jpg'
 45742689_10156808706615960_8187578173060284416_o.jpg
 45754048_101568

#Handle Data

###def one-hot encoder function

In [46]:
# (1) indicates deafness, (0) indicates hearing; encoded in onehot
def ohl(e):
  if e == 1:
    ohl = np.array([1,0])
  elif e == 0:
    ohl = np.array([0,1])
  return ohl
# checking function behavior
print("deaf:")
print(ohl(1))

print("hearing:")
print(ohl(0))

deaf:
[1 0]
hearing:
[0 1]


###def image filetype homogenizer (to .jpg)

In [51]:
# homogenize hearing horses (0)
for filename in tqdm(os.listdir(false_path)):
  # set iterator count for refactored filenames
  i = 0
  # convert .jpeg to .jpg for consistency and shorter filename length
  if filename.endswith(".jpeg"):
    # split filename by base and extention (base.ext -> 'base', 'ext')
    # send filename components to respective variables
    pre, ext = os.path.splitext(filename)
    # rename file as iterator.jpg
    # I am now realizing that the pre, ext filename split was never used
    os.rename(os.path.join(false_path, filename), str(i) + ".jpg")
    # convert .png to .jpg
  if filename.endswith(".png"):
    # use the Pillow library to open image for editing
    image = Image.open(filename)
    # convert .png data to .jpg data
    rgb_image = image.convert('RGB')
    # replace original file with iterator.jpg
    rgb_image.save(str(i) + '.jpg')
  # increment iterator
  i += 1
# print refactored filelist to confirm refactoring success
print(os.listdir(false_path))

100%|██████████| 31/31 [00:00<00:00, 11101.73it/s]

['gene-devine-476058-unsplash.jpg', 'ghost-presenter-425367-unsplash.jpg', 'melanie-hartshorn-348846-unsplash.jpg', 'liam-martens-716280-unsplash.jpg', 'lukas-l-452028-unsplash.jpg', 'peter-kisteman-329830-unsplash.jpg', 'paolo-nicolello-1171866-unsplash.jpg', 'revolt-386471-unsplash.jpg', 'alexandra-zota-1242250-unsplash.jpg', 'crystal-huff-757104-unsplash.jpg', 'alex-blajan-119661-unsplash.jpg', 'annie-spratt-587023-unsplash.jpg', 'abigail-verberne-1179842-unsplash.jpg', 'nicolas-j-leclercq-1141089-unsplash.jpg', 'adam-jang-314067-unsplash.jpg', 'carlee-dittemore-1226247-unsplash.jpg', 'vincent-botta-276146-unsplash.jpg', 'samantha-scholl-157435-unsplash.jpg', 'joanne-o-keefe-270785-unsplash.jpg', 'alice-donovan-rouse-115131-unsplash.jpg', 'lily-banse-460652-unsplash.jpg', 'randy-fath-667618-unsplash.jpg', 'james-coleman-1210076-unsplash.jpg', 'florian-van-duyn-387776-unsplash.jpg', 'darius-soodmand-118384-unsplash.jpg', 'danny-gallegos-355559-unsplash.jpg', 'randy-fath-736175-unspla




In [56]:
# homogenize deaf horses (1)
for filename in tqdm(os.listdir(true_path)):
  # set iterator count for refactored filenames
  i = 0
  # convert .jpeg to .jpg for consistency and shorter filename length
  if filename.endswith(".jpeg"):
    # split filename by base and extention (base.ext -> 'base', 'ext')
    # send filename components to respective variables
    pre, ext = os.path.splitext(filename)
    # rename file as iterator.jpg
    # I am now realizing that the pre, ext filename split was never used
    os.rename(os.path.join(true_path, filename), str(i) + ".jpg")
    # convert .png to .jpg
  if filename.endswith(".png"):
    # use the Pillow library to open image for editing
    image = Image.open(filename)
    # convert .png data to .jpg data
    rgb_image = image.convert('RGB')
    # replace original file with iterator.jpg
    rgb_image.save(str(i) + '.jpg')
  # increment iterator
  i += 1
# print refactored filelist to confirm refactoring success
print(os.listdir(true_path))

  0%|          | 0/67 [00:00<?, ?it/s]


OSError: ignored

###Refactor Images

In [47]:
# check paths
print("hearing path:")
print(os.listdir(false_path))

print("deaf path:")
print(os.listdir(true_path))

hearing path:
['gene-devine-476058-unsplash.jpg', 'ghost-presenter-425367-unsplash.jpg', 'melanie-hartshorn-348846-unsplash.jpg', 'liam-martens-716280-unsplash.jpg', 'lukas-l-452028-unsplash.jpg', 'peter-kisteman-329830-unsplash.jpg', 'paolo-nicolello-1171866-unsplash.jpg', 'revolt-386471-unsplash.jpg', 'alexandra-zota-1242250-unsplash.jpg', 'crystal-huff-757104-unsplash.jpg', 'alex-blajan-119661-unsplash.jpg', 'annie-spratt-587023-unsplash.jpg', 'abigail-verberne-1179842-unsplash.jpg', 'nicolas-j-leclercq-1141089-unsplash.jpg', 'adam-jang-314067-unsplash.jpg', 'carlee-dittemore-1226247-unsplash.jpg', 'vincent-botta-276146-unsplash.jpg', 'samantha-scholl-157435-unsplash.jpg', 'joanne-o-keefe-270785-unsplash.jpg', 'alice-donovan-rouse-115131-unsplash.jpg', 'lily-banse-460652-unsplash.jpg', 'randy-fath-667618-unsplash.jpg', 'james-coleman-1210076-unsplash.jpg', 'florian-van-duyn-387776-unsplash.jpg', 'darius-soodmand-118384-unsplash.jpg', 'danny-gallegos-355559-unsplash.jpg', 'randy-fath

In [49]:
#sequence and label images of hearing horses (0)
for filename in tqdm(os.listdir(false_path)):
  i = 0
  i += 1

SyntaxError: ignored

###Load Images