# KICKSTARTER: BAGGING MODELS (KICKSTARTER AS CLIENT)

In [33]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, gradient_boosting
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.pipeline import Pipeline

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
kick_proc = pd.read_pickle('../../data/03_processed/kick_proc.pkl')

In [4]:
kick_proc.columns

Index(['currency', 'goal_original', 'category', 'country', 'blurb_word_count',
       'campaign_length', 'delta_created_launched', 'goal_usd',
       'successful_dummy', 'world_regions'],
      dtype='object')

In [5]:
kick_proc.head()

Unnamed: 0,currency,goal_original,category,country,blurb_word_count,campaign_length,delta_created_launched,goal_usd,successful_dummy,world_regions
0,USD,5000.0,Tabletop Games,US,9.0,32,13,5000.0,1,Northern America
1,USD,3500.0,Music,US,14.0,18,1,3500.0,1,Northern America
2,USD,500.0,Zines,US,17.0,15,1,500.0,1,Northern America
3,USD,6800.0,Graphic Novels,US,12.0,30,6,6800.0,1,Northern America
4,USD,600.0,Mixed Media,US,19.0,30,0,600.0,1,Northern America


In [6]:
kick_proc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 275642 entries, 0 to 275641
Data columns (total 10 columns):
currency                  275642 non-null object
goal_original             275642 non-null float64
category                  275642 non-null object
country                   275642 non-null object
blurb_word_count          275642 non-null float64
campaign_length           275642 non-null int64
delta_created_launched    275642 non-null int64
goal_usd                  275642 non-null float64
successful_dummy          275642 non-null uint8
world_regions             275642 non-null object
dtypes: float64(3), int64(2), object(4), uint8(1)
memory usage: 19.2+ MB


## Baseline Model - Decision Tree

In [7]:
y = kick_proc['successful_dummy']
df_no_y = kick_proc.drop('successful_dummy', axis=1)
X = pd.get_dummies(df_no_y)

In [8]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 275642 entries, 0 to 275641
Columns: 400 entries, goal_original to world_regions_Western Europe
dtypes: float64(3), int64(2), uint8(395)
memory usage: 114.3 MB


In [9]:
X.head()

Unnamed: 0,goal_original,blurb_word_count,campaign_length,delta_created_launched,goal_usd,currency_AUD,currency_CAD,currency_CHF,currency_DKK,currency_EUR,currency_GBP,currency_HKD,currency_JPY,currency_MXN,currency_NOK,currency_NZD,currency_SEK,currency_SGD,currency_USD,category_3D Printing,category_Academic,category_Accessories,category_Action,category_Animals,category_Animation,category_Anthologies,category_Apparel,category_Apps,category_Architecture,category_Art,category_Art Books,category_Audio,category_Bacon,category_Blues,category_Calendars,category_Camera Equipment,category_Candles,category_Ceramics,category_Children's Books,category_Childrenswear,category_Chiptune,category_Civic Design,category_Classical Music,category_Comedy,category_Comic Books,category_Comics,category_Community Gardens,category_Conceptual Art,category_Cookbooks,category_Country & Folk,category_Couture,category_Crafts,category_Crochet,category_DIY,category_DIY Electronics,category_Dance,category_Design,category_Digital Art,category_Documentary,category_Drama,category_Drinks,category_Electronic Music,category_Embroidery,category_Events,category_Experimental,category_Fabrication Tools,category_Faith,category_Family,category_Fantasy,category_Farmer's Markets,category_Farms,category_Fashion,category_Festivals,category_Fiction,category_Film & Video,category_Fine Art,category_Flight,category_Food,category_Food Trucks,category_Footwear,category_Gadgets,category_Games,category_Gaming Hardware,category_Glass,category_Graphic Design,category_Graphic Novels,category_Hardware,category_Hip-Hop,category_Horror,category_Illustration,category_Immersive,category_Indie Rock,category_Installations,category_Interactive Design,category_Jazz,category_Jewelry,category_Journalism,category_Kids,category_Knitting,category_Latin,category_Letterpress,category_Literary Journals,category_Literary Spaces,category_Live Games,category_Makerspaces,category_Metal,category_Mixed Media,category_Mobile Games,category_Movie Theaters,category_Music,category_Music Videos,category_Musical,category_Narrative Film,category_Nature,category_Nonfiction,category_Painting,category_People,category_Performance Art,category_Performances,category_Periodicals,category_Pet Fashion,category_Photo,category_Photobooks,category_Photography,category_Places,category_Playing Cards,category_Plays,category_Poetry,category_Pop,category_Pottery,category_Print,category_Printing,category_Product Design,category_Public Art,category_Publishing,category_Punk,category_Puzzles,category_Quilts,category_R&B,category_Radio & Podcasts,category_Ready-to-wear,category_Residencies,category_Restaurants,category_Robots,category_Rock,category_Romance,category_Science Fiction,category_Sculpture,category_Shorts,category_Small Batch,category_Software,category_Sound,category_Space Exploration,category_Spaces,category_Stationery,category_Tabletop Games,category_Taxidermy,category_Technology,category_Television,category_Textiles,category_Theater,category_Thrillers,category_Translations,category_Typography,category_Vegan,category_Video,category_Video Art,category_Video Games,category_Wearables,category_Weaving,category_Web,category_Webcomics,category_Webseries,category_Woodworking,category_Workshops,category_World Music,category_Young Adult,category_Zines,country_AE,country_AF,country_AG,country_AL,country_AM,country_AQ,country_AR,country_AT,country_AU,country_AX,country_AZ,country_BA,country_BB,country_BD,country_BE,country_BF,country_BG,country_BH,country_BJ,country_BM,country_BO,country_BR,country_BS,country_BT,country_BW,country_BY,country_BZ,country_CA,country_CD,country_CF,country_CG,country_CH,country_CI,country_CK,country_CL,country_CM,country_CN,country_CO,country_CR,country_CU,country_CV,country_CW,country_CY,country_CZ,country_DE,country_DJ,country_DK,country_DM,country_DO,country_DZ,country_EC,country_EE,country_EG,country_ES,country_ET,country_FI,country_FJ,country_FM,country_FO,country_FR,country_GA,country_GB,country_GD,country_GE,country_GH,country_GI,country_GL,country_GM,country_GN,country_GP,country_GQ,country_GR,country_GT,country_GU,country_GY,country_HK,country_HN,country_HR,country_HT,country_HU,country_ID,country_IE,country_IL,country_IN,country_IQ,country_IR,country_IS,country_IT,country_JM,country_JO,country_JP,country_KE,country_KG,country_KH,country_KI,country_KN,country_KP,country_KR,country_KW,country_KY,country_KZ,country_LA,country_LB,country_LC,country_LK,country_LR,country_LS,country_LT,country_LU,country_LV,country_LY,country_MA,country_MC,country_MD,country_ME,country_MG,country_MK,country_ML,country_MM,country_MN,country_MO,country_MQ,country_MR,country_MT,country_MU,country_MV,country_MW,country_MX,country_MY,country_MZ,country_NC,country_NE,country_NG,country_NI,country_NL,country_NO,country_NP,country_NZ,country_PA,country_PE,country_PF,country_PG,country_PH,country_PK,country_PL,country_PN,country_PR,country_PS,country_PT,country_PY,country_QA,country_RO,country_RS,country_RU,country_RW,country_SA,country_SC,country_SD,country_SE,country_SG,country_SI,country_SJ,country_SK,country_SL,country_SN,country_SO,country_SR,country_SS,country_SV,country_SX,country_SY,country_SZ,country_TC,country_TD,country_TH,country_TJ,country_TL,country_TN,country_TO,country_TR,country_TT,country_TW,country_TZ,country_UA,country_UG,country_US,country_UY,country_VA,country_VC,country_VE,country_VI,country_VN,country_VU,country_WS,country_XK,country_YE,country_ZA,country_ZM,country_ZW,world_regions_Antarctica,world_regions_Australia and New Zealand,world_regions_Caribbean,world_regions_Central America,world_regions_Central Asia,world_regions_Eastern Africa,world_regions_Eastern Asia,world_regions_Eastern Europe,world_regions_Melanesia,world_regions_Micronesia,world_regions_Middle Africa,world_regions_Northern Africa,world_regions_Northern America,world_regions_Northern Europe,world_regions_Polynesia,world_regions_South America,world_regions_South-eastern Asia,world_regions_Southern Africa,world_regions_Southern Asia,world_regions_Southern Europe,world_regions_Western Africa,world_regions_Western Asia,world_regions_Western Europe
0,5000.0,9.0,32,13,5000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,3500.0,14.0,18,1,3500.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,500.0,17.0,15,1,500.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,6800.0,12.0,30,6,6800.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,600.0,19.0,30,0,600.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


**The target variable is reasonably balanced. we will revisit this again later to figure out if we need to balance it** 

In [10]:
y.value_counts()

1    146584
0    129058
Name: successful_dummy, dtype: int64

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

In [12]:
print('Train Set: \n', y_train.value_counts()/len(y_train))
print('Test Set: \n', y_test.value_counts()/len(y_test))

Train Set: 
 1    0.531793
0    0.468207
Name: successful_dummy, dtype: float64
Test Set: 
 1    0.531787
0    0.468213
Name: successful_dummy, dtype: float64


In [13]:
dtc = DecisionTreeClassifier()

In [14]:
dtc.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [15]:
y_pred = dtc.predict(X_test)

In [16]:
accuracy_score(y_test, y_pred)

0.7047060701484523

In [17]:
feature_importance = pd.DataFrame(X.columns, dtc.feature_importances_).reset_index()

Think about grouping the country by region (decrease the number of variables).

In [18]:
feature_importance.sort_values(by = 'index', ascending= False).head()

Unnamed: 0,index,0
3,0.130369,delta_created_launched
4,0.117887,goal_usd
1,0.09831,blurb_word_count
0,0.076699,goal_original
2,0.072723,campaign_length


## RANDOM FOREST

It looks like each individual country is not so significant. Let's try doing this with world region. 

In [19]:
kick_proc.head()

Unnamed: 0,currency,goal_original,category,country,blurb_word_count,campaign_length,delta_created_launched,goal_usd,successful_dummy,world_regions
0,USD,5000.0,Tabletop Games,US,9.0,32,13,5000.0,1,Northern America
1,USD,3500.0,Music,US,14.0,18,1,3500.0,1,Northern America
2,USD,500.0,Zines,US,17.0,15,1,500.0,1,Northern America
3,USD,6800.0,Graphic Novels,US,12.0,30,6,6800.0,1,Northern America
4,USD,600.0,Mixed Media,US,19.0,30,0,600.0,1,Northern America


In [20]:
kick_random1 = kick_proc.drop(columns=['currency', 'goal_original', 'country'])

In [28]:
y = kick_random1['successful_dummy'].values
drop_values_y = kick_random1.drop(columns=['successful_dummy'])
X = pd.get_dummies(drop_values_y)

### Randomized Search CV

In [47]:
steps = [('rfc', RandomForestClassifier(random_state=88))]

pipeline = Pipeline(steps)

parameters = {'rfc__n_estimators':list(range(1, 200, 5)), 
             'rfc__criterion': ['gini', 'entropy'], 
             'rfc__max_depth': list(range(1, 10, 1)), 
             'rfc__min_samples_split': list(range(2, 10, 1)), 
             'rfc__min_samples_leaf': list(range(1, 5, 1))}

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

randomCV = RandomizedSearchCV(pipeline, parameters, cv=10)

In [48]:
randomCV.fit(X_train, y_train)

RandomizedSearchCV(cv=10, error_score='raise-deprecating',
                   estimator=Pipeline(memory=None,
                                      steps=[('rfc',
                                              RandomForestClassifier(bootstrap=True,
                                                                     class_weight=None,
                                                                     criterion='gini',
                                                                     max_depth=None,
                                                                     max_features='auto',
                                                                     max_leaf_nodes=None,
                                                                     min_impurity_decrease=0.0,
                                                                     min_impurity_split=None,
                                                                     min_samples_leaf=1,
                                 

In [49]:
randomCV.best_params_

{'rfc__n_estimators': 131,
 'rfc__min_samples_split': 3,
 'rfc__min_samples_leaf': 2,
 'rfc__max_depth': 6,
 'rfc__criterion': 'gini'}

In [50]:
y_pred = randomCV.predict(X_test)

In [51]:
accuracy_score(y_test, y_pred)

0.7169392404695912

The score increased by 1 percent! Let's see if we can make this better by doing a grid search around the params that the randomized one classified. 

### Further Tuning - Grid Search CV

In [56]:
steps = [('rfc', RandomForestClassifier(random_state=88, criterion='gini'))]

pipeline = Pipeline(steps)

parameters = {'rfc__n_estimators': [120, 131, 140], 
             'rfc__max_depth': [5, 6, 7], 
             'rfc__min_samples_leaf': [1, 2, 3]}

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

gridCV = GridSearchCV(pipeline, parameters, cv=10)

In [57]:
gridCV.fit(X_train, y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('rfc',
                                        RandomForestClassifier(bootstrap=True,
                                                               class_weight=None,
                                                               criterion='gini',
                                                               max_depth=None,
                                                               max_features='auto',
                                                               max_leaf_nodes=None,
                                                               min_impurity_decrease=0.0,
                                                               min_impurity_split=None,
                                                               min_samples_leaf=1,
                                                               min_samples_split=2,
                     

In [58]:
filename = '../../data/04_models/gridsearch_best_model_randomforest.sav'
pickle.dump(gridCV, open(filename, 'wb'))

In [59]:
gridCV.best_params_

{'rfc__max_depth': 7, 'rfc__min_samples_leaf': 3, 'rfc__n_estimators': 120}

In [60]:
y_pred = gridCV.predict(X_test)

In [61]:
accuracy_score(y_test, y_pred)

0.7238467008169959

### Build our Random Forest Model

In [62]:
final_rfm = RandomForestClassifier(max_depth=7, min_samples_leaf=3, n_estimators=120)

In [63]:
final_rfm.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=7, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=120,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [68]:
feature_importance = pd.DataFrame(X.columns, final_rfm.feature_importances_).reset_index()
feature_importance.sort_values(by = 'index', ascending= False).head()

Unnamed: 0,index,0
3,0.158148,goal_usd
140,0.155023,category_Tabletop Games
117,0.083182,category_Product Design
2,0.073693,delta_created_launched
12,0.066834,category_Apps


### Test our Random Forest Model