# Convolutional Neural Network Attempt 001
Basic attempt at a CNN for the Algae Bloom Dataset

# Step 0: Obtain the Data

In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

%matplotlib inline

In [67]:
lake_dataset = pd.read_csv('./data/cleaned/utah_lake_vineyard.csv')

In [68]:
lake_dataset.head()

Unnamed: 0,Date,Time,Temperature,Sp_Cond,pH_mV,pH,Turbidity,Chlorophyll,Chlorophyll_RFU,ODOSat,ODO,BGA_Phycocyanin_RFU
0,5-May,0:00,15.02,1848,-100.1,8.36,16.84,4.4,1.3,90.2,9.04,0.4
1,5-May,0:15,14.99,1847,-100.1,8.36,16.76,4.2,1.2,90.2,9.04,0.4
2,5-May,0:30,14.96,1847,-100.1,8.36,16.82,4.3,1.3,90.1,9.04,0.4
3,5-May,0:45,14.95,1848,-100.1,8.36,17.19,4.5,1.3,90.0,9.03,0.4
4,5-May,1:00,14.92,1848,-100.0,8.36,16.85,4.5,1.3,89.8,9.02,0.4


In [69]:
lake_dataset['Temperature'].max()

28.57

## Define Helper Functions

In [70]:
def feature_normalize(dataset):
    cols = dataset.columns.values
    dataset[cols] = dataset[cols].apply(lambda x: (x - x.min()) / (x.max() - x.min() ) )
    return dataset

def set_max_BGA_RFU(dataset, min_val):
    dataset['BGA_Phycocyanin_RFU'] = dataset['BGA_Phycocyanin_RFU'].apply(lambda x: 1 if x >= min_val else 0)
    return dataset

## Set BGA RFU Threshold and Update Data

In [71]:
rfu_threshold = 1.2
lake_dataset = set_max_BGA_RFU(lake_dataset, rfu_threshold)

In [72]:
lake_dataset.head()

Unnamed: 0,Date,Time,Temperature,Sp_Cond,pH_mV,pH,Turbidity,Chlorophyll,Chlorophyll_RFU,ODOSat,ODO,BGA_Phycocyanin_RFU
0,5-May,0:00,15.02,1848,-100.1,8.36,16.84,4.4,1.3,90.2,9.04,0
1,5-May,0:15,14.99,1847,-100.1,8.36,16.76,4.2,1.2,90.2,9.04,0
2,5-May,0:30,14.96,1847,-100.1,8.36,16.82,4.3,1.3,90.1,9.04,0
3,5-May,0:45,14.95,1848,-100.1,8.36,17.19,4.5,1.3,90.0,9.03,0
4,5-May,1:00,14.92,1848,-100.0,8.36,16.85,4.5,1.3,89.8,9.02,0


## Drop Undesired Columns

In [73]:
cols_to_keep = []
# cols_to_keep.append('Date')
# cols_to_keep.append('Time')
cols_to_keep.append('Temperature')
cols_to_keep.append('Sp_Cond')
# cols_to_keep.append('pH_mV')
cols_to_keep.append('pH')
cols_to_keep.append('Turbidity')
# cols_to_keep.append('Chlorophyll')
# cols_to_keep.append('Chlorophyll_RFU')
# cols_to_keep.append('ODOSat')
cols_to_keep.append('ODO')
cols_to_keep.append('BGA_Phycocyanin_RFU')

cols = list(lake_dataset.columns.values)

for col in cols:
    if col not in cols_to_keep:
        lake_dataset = lake_dataset.drop(col, axis=1)

In [74]:
lake_dataset.head()

Unnamed: 0,Temperature,Sp_Cond,pH,Turbidity,ODO,BGA_Phycocyanin_RFU
0,15.02,1848,8.36,16.84,9.04,0
1,14.99,1847,8.36,16.76,9.04,0
2,14.96,1847,8.36,16.82,9.04,0
3,14.95,1848,8.36,17.19,9.03,0
4,14.92,1848,8.36,16.85,9.02,0


## Normalize Data

In [75]:
lake_dataset = feature_normalize(lake_dataset)

AttributeError: 'float' object has no attribute 'min'

In [57]:
lake_dataset.head()

Unnamed: 0,Temperature,Sp_Cond,pH,Turbidity,ODO,BGA_Phycocyanin_RFU
0,0.409329,0.853771,0.330769,0.025439,0.351818,0.0
1,0.408021,0.853309,0.330769,0.025313,0.351818,0.0
2,0.406713,0.853309,0.330769,0.025407,0.351818,0.0
3,0.406277,0.853771,0.330769,0.025989,0.350909,0.0
4,0.404969,0.853771,0.330769,0.025454,0.35,0.0


1.0