<a href="https://colab.research.google.com/github/katieprice14/computer-vision-system/blob/master/Deep_Learning_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installs Libraries

In [1]:
! pip install opencv-python
! pip install os
! pip install xlsxwriter
! pip install openpyxl

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[31mERROR: Could not find a version that satisfies the requirement os (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for os[0m[31m
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting xlsxwriter
  Downloading XlsxWriter-3.1.0-py3-none-any.whl (152 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.7/152.7 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.1.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Imports and mounts to google drive

In [2]:
#all imports
import tensorflow as tf
import cv2
import os
import random
import xlsxwriter
from openpyxl import load_workbook
import array
import numpy as np
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Creates a custom data generator

The input to the data generator is the dataframe, and which columns to use (batch size, height, width, channels)

Includes 2 methods:

```
__getitem__
__len__
```

The role of the getitem method is to generate one batch of data

This getitem method does several things:

*   Reads the input and output (estimated) data
*   Converts the data type from buffer to float
*   Resizes data
* returns both the input and output data

The role of the len method is to return the length of the data




In [3]:
class CustomDataGen(tf.keras.utils.Sequence):
    
    def __init__(self, binary, track,
                 batch_size=32,
                 input_size=(32, 224, 396, 1)):
      
        self.binary = open(binary, "rb")
        self.track = open(track, "r")
        self.data = [line.split(",") for line in self.track]
        self.batch_size = batch_size
        self.input_size = input_size
    
    def __getitem__(self, index):
        self.binary.seek(int(self.data[index][0]))
        input_data = self.binary.read(int(self.data[index][1].strip()))
        input_data = np.frombuffer(input_data, dtype = np.float32 )
        input_data = np.resize(input_data, self.input_size)

        self.binary.seek(int(self.data[index][2]))
        output_data = self.binary.read(int(self.data[index][3].strip()))
        output_data = np.frombuffer(output_data, dtype = np.float32 )
        output_data = np.resize(output_data, (self.batch_size, 4))       
        return input_data, output_data

    def __len__(self):
        return len(self.data)

## Creates the VGG16 Class

This class contains 3 dense layers, 13 convolution later, and 6 2D max pool layers 

The class calls on the layers and returns the last layer.

The information of the layers can be found at: https://www.tensorflow.org/api_docs/python/tf/keras/layers

In [4]:
class VGGimplementation(tf.keras.Model):

  def __init__(self):
    super().__init__()
    self.dense1 = tf.keras.layers.Dense(16, activation=tf.nn.relu)
    self.dense2 = tf.keras.layers.Dense(8, activation=tf.nn.relu)
    self.dense3 = tf.keras.layers.Dense(4, activation=tf.nn.relu)

    self.convolution1 = tf.keras.layers.Conv2D(64, 3, activation='relu')
    self.convolution2 = tf.keras.layers.Conv2D(64, 3, activation='relu')
    self.convolution3 = tf.keras.layers.Conv2D(128, 3, activation='relu')
    self.convolution4 = tf.keras.layers.Conv2D(128, 3, activation='relu')
    self.convolution5 = tf.keras.layers.Conv2D(256, 3, activation='relu')
    self.convolution6 = tf.keras.layers.Conv2D(256, 3, activation='relu')
    self.convolution7 = tf.keras.layers.Conv2D(256, 3, activation='relu')
    self.convolution8 = tf.keras.layers.Conv2D(512, 3, activation='relu')
    self.convolution9 = tf.keras.layers.Conv2D(512, 3, activation='relu')
    self.convolution10 = tf.keras.layers.Conv2D(512, 3, activation='relu')
    self.convolution11 = tf.keras.layers.Conv2D(512, 3, activation='relu')
    self.convolution12 = tf.keras.layers.Conv2D(512, 3, activation='relu')
    self.convolution13 = tf.keras.layers.Conv2D(512, 3, activation='relu')

    self.max_pool_2d_1 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
   strides=(1, 1), padding='valid')
    self.max_pool_2d_2 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
   strides=(1, 1), padding='valid')
    self.max_pool_2d_3 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
   strides=(1, 1), padding='valid')
    self.max_pool_2d_4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
   strides=(1, 1), padding='valid')
    self.max_pool_2d_5 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
   strides=(1, 1), padding='valid')
    self.max_pool_2d_6 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
   strides=(1, 1), padding='valid')   
    self.flatten = tf.keras.layers.Flatten()

  def call(self, inputs):

    x = self.convolution1(inputs)
    x = self.max_pool_2d_6(x)
    x = self.convolution2(x)
    x = self.max_pool_2d_1(x)
    x = self.convolution3(x)
    #x = self.convolution4(x)
    #x = self.max_pool_2d_2(x)
    #x = self.convolution5(x)
    #x = self.convolution6(x)
    #x = self.convolution7(x)
    #x = self.max_pool_2d_3(x)
    #x = self.convolution8(x)
    #x = self.convolution9(x)
    #x = self.convolution10(x)
    #x = self.max_pool_2d_4(x)
    #x = self.convolution11(x)
    #x = self.convolution12(x)
    #x = self.convolution13(x)
    #x = self.max_pool_2d_5(x) 
    x= self.flatten(x)
    x = self.dense1(x)
    x = self.dense2(x)  

    return self.dense3(x)


## Data Split

Creates binary files and CSV files for testing, training, and validation

The binary file contains batches of the input data, output data, and batch number.

The CSV file contains the start and end bytes of each batch.

Splits the data into the correspond training, testing and validation csv and png files based on a 80/10/10 split correspondingly using a random number generator.

In [5]:
validation_start = 0
testing_start = 0
training_start = 0 
directory = r"/content/drive/MyDrive/VT Animal Science: Computer vision system for beef cattle/Deep Learning Scripts and Resources/data/bin"
with open(directory + "/allcows.csv", "r") as allcowscsv:
  with open(directory + "/allcows.bin", "rb") as allcowsbin:
    with open(directory + "/training.csv", "w") as trainingcsv:
      with open(directory + "/training.bin", "wb") as trainingbin:
        with open(directory + "/validation.csv", "w") as validationcsv:
          with open(directory + "/validation.bin", "wb") as validationbin:
            with open(directory + "/test.csv","w") as testcsv:
              with open(directory + "/test.bin","wb") as testbin:
                track = allcowscsv.readlines()
                for row in track:
                  cell = row.strip().split(',')
                  i = random.random()
                  if i<0.8:
                    allcowsbin.seek(int(cell[0]))
                    read_input = allcowsbin.read(int(cell[1]))
                    allcowsbin.seek(int(cell[2]))
                    read_output = allcowsbin.read(int(cell[3]))                    
                    wroteI = trainingbin.write(read_input)
                    wroteO = trainingbin.write(read_output)    
                    trainingcsv.write(str(training_start)+","+str(wroteI)+",")
                    training_start += wroteI
                    trainingcsv.write(str(training_start)+","+str(wroteO)+"\n")  
                    training_start += wroteO                          
                  elif 0.8<i<0.9:
                    allcowsbin.seek(int(cell[0]))
                    read_input = allcowsbin.read(int(cell[1]))
                    allcowsbin.seek(int(cell[2]))
                    read_output = allcowsbin.read(int(cell[3]))                    
                    wroteI = trainingbin.write(read_input)
                    wroteO = validationbin.write(read_output)  
                    validationcsv.write(str(validation_start)+","+str(wroteI)+",")
                    validation_start += wroteI
                    validationcsv.write(str(validation_start)+","+str(wroteO)+"\n")
                    validation_start += wroteO
                  else:
                    allcowsbin.seek(int(cell[0]))
                    read_input = allcowsbin.read(int(cell[1]))
                    allcowsbin.seek(int(cell[2]))
                    read_output = allcowsbin.read(int(cell[3]))
                    wroteI = testbin.write(read_input)
                    wroteO = testbin.write(read_output)    
                    testcsv.write(str(testing_start)+","+str(wroteI)+",")
                    testing_start += wroteI
                    testcsv.write(str(testing_start)+","+str(wroteO)+"\n")
                    testing_start += wroteO                    

## Compiles and fits the model

Uses the keras fit function with the generator being the object of the model. The input and output data are from the .bin and .csv files.

We have 3 generators, the first for training, then validation, then testing.

In [None]:
model = VGGimplementation()
model.compile("adam", "MSE")
generator = CustomDataGen("/content/drive/MyDrive/VT Animal Science: Computer vision system for beef cattle/Deep Learning Scripts and Resources/data/bin/allcows.bin", 
                          "/content/drive/MyDrive/VT Animal Science: Computer vision system for beef cattle/Deep Learning Scripts and Resources/data/bin/allcows.csv")
validation_generator = CustomDataGen("/content/drive/MyDrive/VT Animal Science: Computer vision system for beef cattle/Deep Learning Scripts and Resources/data/bin/validation.bin", 
                          "/content/drive/MyDrive/VT Animal Science: Computer vision system for beef cattle/Deep Learning Scripts and Resources/data/bin/validation.csv")
test_generator = CustomDataGen("/content/drive/MyDrive/VT Animal Science: Computer vision system for beef cattle/Deep Learning Scripts and Resources/data/bin/test.bin", 
                          "/content/drive/MyDrive/VT Animal Science: Computer vision system for beef cattle/Deep Learning Scripts and Resources/data/bin/test.csv")
model.fit(generator, epochs=15, validation_data = validation_generator)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15

## Tests a given model on a dataset

Args:
* model (tf.keras.model): machine lerning model for training, validation, testing
* testSet (tf.keras.sequence): testing dataset

Returns:
* predicted: the results of testing the model on the testing dataset
* MSE: Mean Squared Error Used to estimate the prediction accuracy


In [None]:
def test(model, testSet):
  for x in testSet:
    parameters = x[0]
    output = x[1]
    predicted = model.predict(parameters)
    subtract = np.subtract(output,predicted)
    squared = np.square(subtract)
    absolute = np.abs(subtract)
    summed_squared = np.sum(squared)
    summed_absolute = np.sum(absolute)
    percentage_div = np.divide(subtract,output)
    summed_percentage = np.sum(percentage_div)
    MSE = summed_squared / (32*4)
    MAE = summed_absolute / (32*4)
    MAPE = summed_percentage / (32*4)
    print(MSE)
    print(MAE)
    print(MAPE)
test(model, test_generator)