# KICKSTARTER: BOOSTING MODELS (KICKSTARTER AS CLIENT) 

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.pipeline import Pipeline

In [2]:
pd.set_option('display.max_columns', None)
kick_proc = pd.read_pickle('../../data/03_processed/kick_proc.pkl')

In [3]:
kick_random1 = kick_proc.drop(columns=['currency', 'goal_original', 'country'])
kick_random1.columns

Index(['id', 'category', 'blurb_word_count', 'campaign_length',
       'delta_created_launched', 'goal_usd', 'successful_dummy',
       'world_regions', 'Unnamed: 0', 'cluster_predictions'],
      dtype='object')

In [4]:
y = kick_random1['successful_dummy'].values
drop_values_y = kick_random1.drop(columns=['successful_dummy'])
X_feats = ['category', 'blurb_word_count', 'campaign_length',
           'delta_created_launched', 'goal_usd','world_regions', 
           'cluster_predictions']
X = pd.get_dummies(kick_random1[X_feats])

In [5]:
X.head()

Unnamed: 0,blurb_word_count,campaign_length,delta_created_launched,goal_usd,cluster_predictions,category_3D Printing,category_Academic,category_Accessories,category_Action,category_Animals,category_Animation,category_Anthologies,category_Apparel,category_Apps,category_Architecture,category_Art,category_Art Books,category_Audio,category_Bacon,category_Blues,category_Calendars,category_Camera Equipment,category_Candles,category_Ceramics,category_Children's Books,category_Childrenswear,category_Chiptune,category_Civic Design,category_Classical Music,category_Comedy,category_Comic Books,category_Comics,category_Community Gardens,category_Conceptual Art,category_Cookbooks,category_Country & Folk,category_Couture,category_Crafts,category_Crochet,category_DIY,category_DIY Electronics,category_Dance,category_Design,category_Digital Art,category_Documentary,category_Drama,category_Drinks,category_Electronic Music,category_Embroidery,category_Events,category_Experimental,category_Fabrication Tools,category_Faith,category_Family,category_Fantasy,category_Farmer's Markets,category_Farms,category_Fashion,category_Festivals,category_Fiction,category_Film & Video,category_Fine Art,category_Flight,category_Food,category_Food Trucks,category_Footwear,category_Gadgets,category_Games,category_Gaming Hardware,category_Glass,category_Graphic Design,category_Graphic Novels,category_Hardware,category_Hip-Hop,category_Horror,category_Illustration,category_Immersive,category_Indie Rock,category_Installations,category_Interactive Design,category_Jazz,category_Jewelry,category_Journalism,category_Kids,category_Knitting,category_Latin,category_Letterpress,category_Literary Journals,category_Literary Spaces,category_Live Games,category_Makerspaces,category_Metal,category_Mixed Media,category_Mobile Games,category_Movie Theaters,category_Music,category_Music Videos,category_Musical,category_Narrative Film,category_Nature,category_Nonfiction,category_Painting,category_People,category_Performance Art,category_Performances,category_Periodicals,category_Pet Fashion,category_Photo,category_Photobooks,category_Photography,category_Places,category_Playing Cards,category_Plays,category_Poetry,category_Pop,category_Pottery,category_Print,category_Printing,category_Product Design,category_Public Art,category_Publishing,category_Punk,category_Puzzles,category_Quilts,category_R&B,category_Radio & Podcasts,category_Ready-to-wear,category_Residencies,category_Restaurants,category_Robots,category_Rock,category_Romance,category_Science Fiction,category_Sculpture,category_Shorts,category_Small Batch,category_Software,category_Sound,category_Space Exploration,category_Spaces,category_Stationery,category_Tabletop Games,category_Taxidermy,category_Technology,category_Television,category_Textiles,category_Theater,category_Thrillers,category_Translations,category_Typography,category_Vegan,category_Video,category_Video Art,category_Video Games,category_Wearables,category_Weaving,category_Web,category_Webcomics,category_Webseries,category_Woodworking,category_Workshops,category_World Music,category_Young Adult,category_Zines,world_regions_Antarctica,world_regions_Australia and New Zealand,world_regions_Caribbean,world_regions_Central America,world_regions_Central Asia,world_regions_Eastern Africa,world_regions_Eastern Asia,world_regions_Eastern Europe,world_regions_Melanesia,world_regions_Micronesia,world_regions_Middle Africa,world_regions_Northern Africa,world_regions_Northern America,world_regions_Northern Europe,world_regions_Polynesia,world_regions_South America,world_regions_South-eastern Asia,world_regions_Southern Africa,world_regions_Southern Asia,world_regions_Southern Europe,world_regions_Western Africa,world_regions_Western Asia,world_regions_Western Europe
0,9.0,32,13,5000.0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,14.0,18,1,3500.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,17.0,15,1,500.0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,12.0,30,6,6800.0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,19.0,30,0,600.0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

In [16]:
print('X_train shape :', X_train.shape)
print('X_test shape :', X_test.shape)
print('y_train shape :', y_train.shape)
print('y_test shape :', y_test.shape)


X_train shape : (206731, 187)
X_test shape : (68911, 187)
y_train shape : (206731,)
y_test shape : (68911,)


### Adaboost

#### Initial Model

In [17]:
abc = AdaBoostClassifier()

In [18]:
abc.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=50, random_state=None)

##### Scoring

In [19]:
abc.score(X_test, y_test)

0.749154706795722

#### Tuning 

We can tune our model by running a random grid search with the Adaboost classifier. This model took a substantial amount of time train so so I pickled it and read it back in after running the model. 

In [20]:
filename = '../../data/04_models/randomized_best_model_adaboost.sav'

```python
# set parameters to test in the randomized search 
params = {
    'n_estimators': list(np.arange(50, 200, 10))}

# instantiate randomized search with Adaboost classifier
rsCVAB = RandomizedSearchCV(AdaBoostClassifier(), params, cv=5)

# fit the model
rsCVAB.fit(X_train, y_train)

# let's save the model
filename = '../../data/04_models/randomized_best_model_adaboost.sav'
pickle.dump(rsCVAB, open(filename, 'wb'))
```

#### Results

In [21]:
# load the model from disk
randomized_ada_model = pickle.load(open(filename, 'rb'))

In [22]:
randomized_ada_model.score(X_test, y_test)

ValueError: Number of features of the model must match the input. Model n_features is 186 and input n_features is 187 

In [23]:
randomized_ada_model.best_params_

{'n_estimators': 180}

In [24]:
conf_matrix = pd.DataFrame(confusion_matrix(y_test, y_pred), 
                           index = ['actual 0', 'actual 1'], 
                           columns = ['predicted 0', 'predicted 1'])
conf_matrix

NameError: name 'y_pred' is not defined

## TRAIN OUR FINAL MODEL

In [25]:
final_model_ada = AdaBoostClassifier(n_estimators=180)

In [26]:
final_model_ada.fit(X, y)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=180, random_state=None)

In [27]:
filename = '../../data/04_models/final_model_ada.sav'
pickle.dump(final_model_ada, open(filename, 'wb'))

Let's take a look at the final number of columns

In [28]:
len(X.columns)

187