# Module 20 - Final Project

## Authors:          Stacey Hart, Ron Bingham, Fritz Knack

Umbrella notebook that runs all the models. Comment out any you don't want executed.


## Dependencies

Common dependencies and those needed for each of the models contained in this notebook.

**Note**: Some models have parameters (e.g., deep layers in the Neural Networks model) that can be configured in the cells that follow.


In [1]:
# **************************************
# Common
# **************************************
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
import numpy as np
import matplotlib.pyplot as plt

from utils_common import get_db_engine, encode_dataframe

# Suppresses the GPU-related warning messages that TensorFlow produces
import warnings
warnings.filterwarnings('ignore')

# **************************************
# keras.models.Sequential()
# See Neural Net Configuration Variables below
# **************************************
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint # keras, not kera

# **************************************
# sklearn.linear_model - LinearRegression
# No configuration variables, just run it.
# **************************************
from sklearn.linear_model import LinearRegression

# **************************************
# sklearn.linear_model - Lasso
# No configuration variables, just run it.
# **************************************
from sklearn.linear_model import Lasso

# **************************************
# sklearn.linear_model - Ridge
# No configuration variables, just run it.
# **************************************
from sklearn.linear_model import Ridge

# **************************************
# sklearn.ensemble - RandomForestRegressor
# See Random Forest Configuration Variables below
# **************************************
from sklearn.ensemble import RandomForestRegressor


## Configuration Variables

### Configuration: Common

In [2]:
# **************************************
# Common Configuration Variables
# **************************************
int_random_state = 0
y_column = 'TotalVets'

### Configuration: Neural Network

In [3]:
# **************************************
# Neural Net Configuration Variables
# **************************************

# ******************
# Data Configuration
tablename = 'people_vet_join'
y_column = 'TotalVets'

# ******************
# First Layer Configuration
kS_nn_first_units       = 8
kS_nn_first_activation  = "relu"

# ******************
# Output Layer Configuration
kS_nn_output_units      = 1
kS_nn_output_activation = "sigmoid"

# ******************
# Deep Learning Configuration
kS_nn_deep_layers       = [
    # Comment out the next two lines to disable "deep learning"
    {"units": 16,
     "activation": "relu"},

    # Add more layers with
    #    {"units": <units>,
    #     "activation": <activation>},
                     ]

# ******************
# Compiler Configuration
kS_nn_compile_loss      = "binary_crossentropy"
kS_nn_compile_optimizer = "adam"
kS_nn_compile_metrics   = ["accuracy"]

# ******************
# Training Configuration
kS_nn_train_epochs      = 100

# ******************
# Output File
kS_nn_file              = "kS-Enlistment_Prediction.h5"

### Configuration: Random Forest

In [4]:
# **************************************
# Random Forest Configuration Variables
# **************************************
rf_estimators = 10

## Database Connection

In [5]:
try:
    db_engine = get_db_engine()
except Exception as e:
    print(f"\nFailed to connect to database engine.\n", e)

try:
    combined_df = pd.read_sql_table(tablename, db_engine)
except Exception as e:
    print(f"\nFailed to read table {tablename} on db_engine {db_engine}.\n", e)

## Data Conditioning

In [6]:
# Placeholder
# Data scrubbing currently takes place before this notebook is used.
# (Temp comment: If I can get the cleanup notebooks nicely consolidated and 
# wrapped, I'll put 'em right here.)

## Model Preparation

In [7]:
# ******************
# Preserve State and County for labeling things later. Drop State and County
# from the model to be processed.
try:
    df_labels = combined_df[['State', 'County']]
    combined_df = combined_df.drop(columns=['State', 'County'])
except Exception as e:
    print(f"\nImported data appears to be missing State and County label columns.\n", e)
    exit()

# ******************
# Pull off FIPS, too, but it might have already been cleaned up. No reason to
# die here if it has.
try:
    combined_df = combined_df.drop(columns=['FIPS'])
except:
    pass

# ******************
# Split into features and target arrays
try:
    y = combined_df[y_column].values
    X = combined_df.drop([y_column],1).values
except Exception as e:
    print(f"\nFailed extracting {y_column} as target array.\n", e)
    exit()

# ******************
# Split preprocessed data into a training and testing dataset
try:
    X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=int_random_state)
except Exception as e:
    print(f"\nFailed splitting test/train data.\n", e)
    exit()
    
# ******************
# Scale the data
try:
    scaler = StandardScaler()
    X_scaler = scaler.fit(X_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
except Exception as e:
    print(f"\nFailed scaling data.\n", e)
    exit()


## Models

### Model: Neural Network - Sequential

In [8]:
def run_model_sequential():

    # Define the model
    number_input_features = len(X_train_scaled[0])

    nn = tf.keras.models.Sequential()

    # First hidden layer
    nn.add(tf.keras.layers.Dense(units=kS_nn_first_units,
                input_dim=number_input_features,
                activation=kS_nn_first_activation))

    # Additional hidden layers
    for layer in kS_nn_deep_layers:
        nn.add(tf.keras.layers.Dense(units=layer['units'],
                                     activation=layer['activation']))

    # Output layer
    nn.add(tf.keras.layers.Dense(units=kS_nn_output_units, activation=kS_nn_output_activation))

    # Check the structure of the model
    print(nn.summary())

    # Compile the model
    nn.compile(loss=kS_nn_compile_loss, optimizer=kS_nn_compile_optimizer, metrics=kS_nn_compile_metrics)

    # Train the model
    fit_model = nn.fit(X_train_scaled, y_train,epochs=kS_nn_train_epochs)

    # Evaluate the model using the test data
    model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

    nn.save(kS_nn_file)

# end run_model_sequential()
# **************************************

### Model: Linear Regression

In [9]:
# Placeholder
def run_model_linear_regression():
# Fit the model
#     lin_reg = LinearRegression()
#     lin_reg.fit(x_train, y_train)
#     lin_reg_y_pred = lin_reg.predict(x_test)
#     mse = mean_squared_error(y_test, lin_reg_y_pred)
#     print(mse)
    print('run_model_linear_regression() was called.')

# end run_model_sequential()
# **************************************

### Model: Lasso

In [10]:
# Placeholder
def run_model_lasso():
#     lasso = Lasso()
#     lasso.fit(x_train, y_train)
#     y_pred_lasso = lasso.predict(x_test)
#     mse = mean_squared_error(y_test, y_pred_lasso)
#     print(mse)
    print('run_model_lasso() was called.')
# end run_model_lasso
# **************************************    

### Model: Ridge

In [11]:
# **************************************
# **************************************
# Ridge
# Placeholder
# **************************************
# **************************************
def run_model_ridge():
#     ridge = Ridge()
#     ridge.fit(x_train, y_train)
#     y_pred_ridge = ridge.predict(x_test)
#     mse = mean_squared_error(y_test, y_pred_ridge)
#     print(mse)
    print('run_model_ridge() was called.')
# end run_model_ridge()
# **************************************

### Model: Random Forest Regression

In [12]:
# Placeholder
def run_model_random_forest():
#     # Fitting Random Forest Regression to the dataset
#     regressor = RandomForestRegressor(n_estimators = rf_estimators, random_state = int_random_state)
#     regressor.fit(X.reshape(-1,1), y.reshape(-1, 1))
#     # Predicting a new result
#     y_pred = regressor.predict([[6.5]]) # what is this magic number?

#     # Interactive visualizations. Remove from final result?
#     X_grid = np.arange(min(X), max(X), 0.01)
#     X_grid = X_grid.reshape((len(X_grid), 1))
#     plt.scatter(X, y, color = 'red')
#     plt.plot(X_grid, regressor.predict(X_grid), color = 'blue')
#     plt.title('Truth or Bluff (Random Forest Regression)')
#     plt.xlabel('')
#     plt.ylabel('')
#     plt.show()
    print('run_model_random_forest() was called.')

# end run_model_random_forest()
# **************************************

## Execute Model(s)

In [13]:
run_model_sequential()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 8)                 816       
_________________________________________________________________
dense_1 (Dense)              (None, 16)                144       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 17        
Total params: 977
Trainable params: 977
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Ep

In [14]:
run_model_linear_regression()

run_model_linear_regression() was called.


In [15]:
run_model_lasso()

run_model_lasso() was called.


In [16]:
run_model_ridge()

run_model_ridge() was called.


In [17]:
run_model_random_forest()

run_model_random_forest() was called.
