## Deciscion Trees and Ensemble Methods

In [None]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree, export_text



In [None]:
bg_df = pd.read_csv('data/boardgames.csv')

bg_df.sample(5)

In [None]:
dtc = DecisionTreeClassifier(max_depth = 3) #  Split the sample only three times.

In [None]:
x_names = ['max_players', 'min_players', 'min_playtime', 'max_playtime', 'min_age']

dtc.fit(bg_df[x_names], bg_df['quality_game'])



In [None]:
plt.figure(figsize=(16,8))
plot_tree(dtc, feature_names = x_names, fontsize = 10, filled =True)


In [None]:
print(export_text(dtc, feature_names = x_names))

In [None]:
from sklearn.ensemble import RandomForestClassifier


In [None]:
rf = RandomForestClassifier(max_depth =3 )
rf

In [None]:
rf.fit(bg_df[x_names], bg_df['quality_game'])



In [None]:
len(rf.estimators_)

In [None]:
print(export_text(rf.estimators_[5], feature_names = x_names))

In [None]:
plt.figure(figsize=(16,8))
plot_tree(rf.estimators_[-2], 
          feature_names = x_names, fontsize = 10, filled =True)


In [None]:
imp = pd.DataFrame(rf.feature_importances_, index = x_names)
imp

<div class="alert alert-info">
<h3> Your Turn</h3>
<p> Run the model again chaning the max_depth parameter. What does it mean to change this parameter? How do the results vary?
</div>



<div class="alert alert-info">
<h3> Your Turn</h3>
<p> Find the documentation for this function. What other parameters are available? Add one to your model.
</div>




<div class="alert alert-info">
<h3> Your Turn</h3>
<p> Produce predicted values from your model and evaluate the accuracy score.
</div>





## Time to supersize things

In [None]:
categories = ['category_cardgame',
       'category_wargame', 'category_fantasy', 'category_dice',
       'category_partygame', 'category_fighting', 'category_sciencefiction',
       'category_abstractstrategy', 'category_economic',
       'category_childrensgame', 'category_worldwarii', 'category_bluffing',
       'category_animals', 'category_humor', 'category_actiondexterity',
       'category_adventure', 'category_moviestvradiotheme',
       'category_medieval', 'category_deduction', 'category_miniatures']

mechanics = ['mechanic_dicerolling', 'mechanic_handmanagement',
       'mechanic_hexandcounter', 'mechanic_setcollection',
       'mechanic_variableplayerpowers', 'mechanic_none',
       'mechanic_tileplacement', 'mechanic_modularboard',
       'mechanic_carddrafting', 'mechanic_rollspinandmove',
       'mechanic_areacontrolareainfluence', 'mechanic_auctionbidding',
       'mechanic_simulation', 'mechanic_areamovement',
       'mechanic_simultaneousactionselection',
       'mechanic_actionpointallowancesystem', 'mechanic_cooperativeplay',
       'mechanic_pointtopointmovement', 'mechanic_partnerships',
       'mechanic_memory']

In [None]:
x_names = ['complexity', 'max_players', 'min_players', 'min_playtime', 'max_playtime', 'min_age'] 
many_xs = x_names + mechanics + categories


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline


param_dist = {"max_features": [4, 7],
              "min_samples_split": [10],
              "n_estimators" : [100, 200]}


rfgs = GridSearchCV( RandomForestClassifier(),
                    param_dist,
                    cv = 5,
                    n_jobs = -1,
                    verbose=1 )

In [None]:
rfgs.fit(bg_df[many_xs], bg_df['quality_game'])

In [None]:
rfgs.best_estimator_

In [None]:
rf_best = rfgs.best_estimator_
pd.DataFrame(rf_best.feature_importances_, index = many_xs).sort_values(by = 0, ascending=False)

In [None]:
print(export_text(rf_best.estimators_[11], feature_names = many_xs))

<div class="alert alert-info">
<h3> Your Turn</h3>
<p> Work with your group to find a best fitting model. Compare the accuracy with of logistic regression.
</div>






<div class="alert alert-info">
<h3> Bonus Challenge</h3>

<p> Bonus challenge: Use both features in the data set and ones you construct from a topic model!

</div>
