# Keras functional models lab

---



In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras import models, layers, Input, Model
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import plot_model
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [2]:
def plot_loss(history):
    """ Plot training, test loss. """
    
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'])
    plt.show();

### Read and preprocess the data.

This is the Sonoma animal shelter data.

In [3]:
df = pd.read_csv("https://raw.githubusercontent.com/grbruns/cst383/master/sonoma-shelter-12-2022.csv")
df = df[df['Type'] == 'DOG']

In [4]:
df.columns

Index(['Name', 'Type', 'Breed', 'Color', 'Sex', 'Size', 'Date Of Birth',
       'Impound Number', 'Kennel Number', 'Animal ID', 'Intake Date',
       'Outcome Date', 'Days in Shelter', 'Intake Type', 'Intake Subtype',
       'Outcome Type', 'Outcome Subtype', 'Intake Condition',
       'Outcome Condition', 'Intake Jurisdiction', 'Outcome Jurisdiction',
       'Outcome Zip Code', 'Location', 'Count'],
      dtype='object')

In [5]:
# preprocessing

df = df[['Breed', 'Intake Date', 'Date Of Birth', 'Outcome Type', 'Days in Shelter']]

df = df.dropna()

# age in years
df['Age'] = (pd.to_datetime(df['Intake Date']) - pd.to_datetime(df['Date Of Birth'])).dt.days/365

# only top 10 breeds
top_breeds = df['Breed'].value_counts()[:10].index
df = df[df['Breed'].isin(top_breeds)]

# only looking at whether returned or not
df['Returned'] = (df['Outcome Type'] == 'RETURN TO OWNER').astype(int)

df.drop(columns=['Intake Date', 'Date Of Birth', 'Outcome Type'], inplace=True)

df = pd.get_dummies(df, drop_first=True)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5281 entries, 1 to 22097
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Days in Shelter            5281 non-null   int64  
 1   Age                        5281 non-null   float64
 2   Returned                   5281 non-null   int64  
 3   Breed_CHIHUAHUA SH/MIX     5281 non-null   uint8  
 4   Breed_GERM SHEPHERD        5281 non-null   uint8  
 5   Breed_GERM SHEPHERD/MIX    5281 non-null   uint8  
 6   Breed_LABRADOR RETR        5281 non-null   uint8  
 7   Breed_LABRADOR RETR/MIX    5281 non-null   uint8  
 8   Breed_PARSON RUSS TER/MIX  5281 non-null   uint8  
 9   Breed_PIT BULL             5281 non-null   uint8  
 10  Breed_PIT BULL/MIX         5281 non-null   uint8  
 11  Breed_SIBERIAN HUSKY       5281 non-null   uint8  
dtypes: float64(1), int64(2), uint8(9)
memory usage: 211.4 KB


In [7]:
df.head()

Unnamed: 0,Days in Shelter,Age,Returned,Breed_CHIHUAHUA SH/MIX,Breed_GERM SHEPHERD,Breed_GERM SHEPHERD/MIX,Breed_LABRADOR RETR,Breed_LABRADOR RETR/MIX,Breed_PARSON RUSS TER/MIX,Breed_PIT BULL,Breed_PIT BULL/MIX,Breed_SIBERIAN HUSKY
1,49,1.0,0,0,0,0,0,0,0,0,0,0
7,1,1.087671,1,0,0,0,0,1,0,0,0,0
8,0,0.734247,1,0,0,0,1,0,0,0,0,0
14,1,4.956164,1,0,0,0,0,0,0,1,0,0
21,1,3.0,1,0,0,0,0,0,0,1,0,0


### Problem 1: Using a functional model, create a classifier to predict whether returned to owner.

Use the indicated predictor variables.

Print the model summary, compile and fit the model, and observe your validation accuracy.

Use a validation split for the fit.



In [8]:
target = 'Returned'

predictors = list(df.columns)
predictors.remove('Days in Shelter')
predictors.remove(target)

X = df[predictors].values
y = df[target].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [9]:
print(X.shape, y.shape)

(5281, 10) (5281,)


In [10]:
# YOUR CODE HERE

### Problem 2: Using a functional model, create a multi-output model that will predict whether returned to owner, **and** number of days in shelter.

Note that returned to owner is a binary classification probem, and number of days in shelter is a regression problem.

Repeat the steps of problem 1 for this model.

Predict the output of the model on the training data.


In [11]:
[targets = ['Returned', 'Days in Shelter']

predictors = list(df.columns)
predictors.remove(targets[0])
predictors.remove(targets[1])

X = df[predictors].values
Y = df[targets].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [12]:
print(X.shape, Y.shape)

(5281, 10) (5281, 2)


In [None]:
# YOUR CODE HERE

### Problem 3: Modify the plot_loss() function we've used so that the loss of both outputs is plotted.

In [13]:
def plot_loss(history):
    """ Plot training, test loss. """

    # MODIFY THE CODE BELOW
    
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'])
    plt.show();

### Problem 4: Modify your model to try to get the best possible performance.