In [23]:
import keras
from keras import models
from keras import layers
from keras import optimizers
import os, shutil
import utils
import pandas as pd

## Image data

In [None]:
train_dir = "data_files/Cactus_Image/training_set"
test_dir = "data_files/Cactus_Image/testing_set"

In [None]:
def keras_cnn(n_neurons=32, n_layers=3, filter_size=(3, 3), activation="relu", 
               input_shape =(64,64,3), max_pooling=(2,2), dense_layer=128, 
               loss="binary_crossentropy",optimizer="adam",metrics="acc"):
    # NOTE: always alter the input_shape to the specific input shape off the problem.

    model = models.Sequential()
    model.add(layers.Conv2D(n_neurons, filter_size, activation=activation,
                           input_shape =input_shape))
    model.add(layers.MaxPooling2D(max_pooling))
    for num in range(n_layers-2):
        model.add(layers.Conv2D(n_neurons, filter_size, activation=activation))
        model.add(layers.MaxPooling2D(max_pooling))
    model.add(layers.Flatten())
    model.add(layers.Dense(dense_layer, activation=activation))
    model.add(layers.Dense(1, activation="sigmoid"))

    model.compile(loss=loss, 
                  optimizer=optimizer,
                  metrics=[metrics])
    
    return model


In [None]:
model = keras_cnn() 
model.summary()

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(64,64),
    batch_size=20,
    class_mode="binary")

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(64, 64),
    batch_size=20,
    class_mode="binary")

In [None]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=250,
    epochs=5,
    validation_data=test_generator,
    validation_steps=50)

## Timeseries Data

In [58]:
df = utils.get_timeseries_dataset()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142193 entries, 0 to 142192
Data columns (total 24 columns):
Date             142193 non-null object
Location         142193 non-null object
MinTemp          141556 non-null float64
MaxTemp          141871 non-null float64
Rainfall         140787 non-null float64
Evaporation      81350 non-null float64
Sunshine         74377 non-null float64
WindGustDir      132863 non-null object
WindGustSpeed    132923 non-null float64
WindDir9am       132180 non-null object
WindDir3pm       138415 non-null object
WindSpeed9am     140845 non-null float64
WindSpeed3pm     139563 non-null float64
Humidity9am      140419 non-null float64
Humidity3pm      138583 non-null float64
Pressure9am      128179 non-null float64
Pressure3pm      128212 non-null float64
Cloud9am         88536 non-null float64
Cloud3pm         85099 non-null float64
Temp9am          141289 non-null float64
Temp3pm          139467 non-null float64
RainToday        142193 non-null int6

In [59]:
df = df.drop(["Location","WindGustDir","WindDir9am","WindDir3pm","Evaporation","Sunshine"], axis=1)

In [24]:
df["Date"] = pd.to_datetime(df["Date"])

In [37]:
def fill_missing(values):
    one_day = 1
    for row in range(values.shape[0]):
        for col in range(values.shape[1]):
            if (values[row, col].isnull()):
                values[row, col] = values[row + one_day, col]

In [61]:
df = df.dropna()

In [66]:
from sklearn.model_selection import train_test_split

In [67]:
y = df["y"]
X = df.loc[:, df.columns != "y"]

train_test_split(X, y, test_size=0.2)

[              Date  MinTemp  MaxTemp  Rainfall  WindGustSpeed  WindSpeed9am  \
 39828   2012-02-21     19.0     25.0      18.0           48.0          20.0   
 128916  2014-11-21     13.0     15.1       0.4           39.0          17.0   
 100883  2011-07-28      3.6     18.6       0.0           26.0           9.0   
 74873   2015-01-25     12.3     17.9       7.2           57.0          31.0   
 64224   2013-04-27     12.8     24.5       0.0           59.0          37.0   
 20673   2009-02-25     19.5     25.7       0.0           44.0          20.0   
 139856  2015-03-19     23.8     38.2       0.0           28.0          11.0   
 101446  2013-04-10     10.2     28.6       0.0           33.0          11.0   
 62919   2009-07-03      7.1     11.8       1.4           56.0          30.0   
 113128  2012-09-03      7.7     21.1       4.2           69.0          24.0   
 9706    2011-02-20     22.0     32.5       0.0           35.0           7.0   
 82764   2011-07-17     12.2     21.8   