In [None]:
# Import required packages
import numpy as np
import tensorflow as tf
import pandas as pd
import sklearn
import seaborn as sns

print(f'Numpy Version: {np.__version__}')
print(f'Tensorflow Version: {tf.__version__}')
print(f'Pandas Version: {pd.__version__}')
print(f'sklearn Version: {sklearn.__version__}')
print(f'seaborn Version: {sns.__version__}')

In [None]:
df1 = pd.read_csv('/Users/jasonrobinson/Downloads/Kickstarter_2018-05-16T03_20_20_822Z/Kickstarter038.csv')
df2 = pd.read_csv('/Users/jasonrobinson/Downloads/Kickstarter_2019-05-14T03_20_08_560Z/Kickstarter013.csv')
df = pd.concat([df1, df2], axis=1)
df.sample(10)

In [None]:
df = df.dropna(axis=1)
df.shape

In [None]:
df.sample(10)

In [None]:
df = df[df['state'] != 'canceled']

df.shape

In [None]:
df.sample(10)

In [None]:
# Removing data leakage columns

df = df[['category', 'main_category', 'currency', 'deadline', 'launched', 'country', 'state', 'goal']]

df.sample(5)

In [None]:
# Converting state into binary success and failure where success=1  and failure = 0 

df = df.assign(outcome=(df['state'] == 'successful').astype(int))

In [None]:
df['launched'] = pd.to_datetime(df['launched'])
df['deadline'] = pd.to_datetime(df['deadline'])

df = df.assign(hour_launched=df.launched.dt.hour,
               day_launched=df.launched.dt.day,
               month_launched=df.launched.dt.month,
               year_launched=df.launched.dt.year)

df = df.assign(day_deadline=df.launched.dt.day,
               month_deadline=df.launched.dt.month,
               year_deadline=df.launched.dt.year)

df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder

cat_features = ['category', 'currency', 'country', 'main_category']
encoder = LabelEncoder()

encoded = df[cat_features].apply(encoder.fit_transform)
encoded.head(10)

df = df[['goal', 'hour_launched', 'day_launched', 'month_launched', 'year_launched','day_deadline', 'month_deadline', 'year_deadline', 'outcome']].join(encoded)
df.head()


## Data Exploration

    Descriptive statistics for key features
    Visualizations for key features

In [None]:
# Baseline model
features = ['goal', 'hour_launched', 'day_launched', 'month_launched', 'year_launched','day_deadline', 'month_deadline', 'year_deadline', 'category', 'currency', 'country', 'main_category']
target = 'outcome'

X = df[features]
y = df[target]

X.head()

In [None]:
from sklearn.preprocessing import RobustScaler 

scaler = RobustScaler()

X_numerical = X.drop(['main_category','country'], axis = 1)

scaler.fit(X_numerical)

scaled_X = scaler.transform(X_numerical)

scaled_X = pd.DataFrame(scaled_X, columns = X_numerical.columns)

In [None]:

%load_ext tensorboard

import os
import datetime
import tensorflow as tf

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
    Dense(10, input_dim=10, activation='relu'),
    Dense(1, activation='sigmoid'),    
])

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

In [None]:
model.fit(scaled_X, y,
          epochs=10,
          batch_size=32,
          validation_split=0.2,
          callbacks=[tensorboard_callback])


## Model Approach #1


In [None]:
model2 = Sequential([
    Dense(64, input_dim=10, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid'),    
])

model2.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model2.summary()

In [None]:
model2.fit(scaled_X, y,
          epochs=20,
          batch_size=32,
          validation_split=0.2,
          callbacks=[tensorboard_callback])

In [None]:

## Model Approach #2


In [None]:
# Introducing early stopping
from tensorflow.keras.callbacks import EarlyStopping

logdir = os.path.join("logs", "EarlyStopping-Loss")

tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
stop = EarlyStopping(monitor='val_accuracy', min_delta=0.005, patience=3)

model3 = tf.keras.Sequential([
    Dense(300, input_dim=10, activation='relu'),
    Dense(150, activation='relu'),
    Dense(75, activation='relu'),
    Dense(1, activation='sigmoid')
])

model3.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['accuracy'])

In [None]:
model.fit(scaled_X, y, epochs=99, 
          validation_split=0.2,
          callbacks=[tensorboard_callback, stop])

In [None]:

## Saving the model


In [None]:
import joblib

joblib.dump(model, '../models/model_name.joblib')