In [308]:
%matplotlib inline 

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import make_scorer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
pd.set_option('display.max_columns', None)

**Data Preparation**

In this notebook I have done some data preparation, cleaning and preprocessing for the Kobe dataset. I also did some simple train/test split testing accuracy estimates.

To do:
* Analyze and solve potential problems up to this point
* Feature Selection
* Variance Threshold
* Most important features
* Recursive Feature Elimination
* PCA Visualization
* Final predictions

*Because this is a work in progress and potentially contains errors and bugs, I will not include and comments or explanations at this point.*

In [309]:
data = pd.read_csv("data.csv")
data.head()

Unnamed: 0,action_type,combined_shot_type,game_event_id,game_id,lat,loc_x,loc_y,lon,minutes_remaining,period,playoffs,season,seconds_remaining,shot_distance,shot_made_flag,shot_type,shot_zone_area,shot_zone_basic,shot_zone_range,team_id,team_name,game_date,matchup,opponent,shot_id
0,Jump Shot,Jump Shot,10,20000012,33.9723,167,72,-118.1028,10,1,0,2000-01,27,18,,2PT Field Goal,Right Side(R),Mid-Range,16-24 ft.,1610612747,Los Angeles Lakers,2000-10-31,LAL @ POR,POR,1
1,Jump Shot,Jump Shot,12,20000012,34.0443,-157,0,-118.4268,10,1,0,2000-01,22,15,0.0,2PT Field Goal,Left Side(L),Mid-Range,8-16 ft.,1610612747,Los Angeles Lakers,2000-10-31,LAL @ POR,POR,2
2,Jump Shot,Jump Shot,35,20000012,33.9093,-101,135,-118.3708,7,1,0,2000-01,45,16,1.0,2PT Field Goal,Left Side Center(LC),Mid-Range,16-24 ft.,1610612747,Los Angeles Lakers,2000-10-31,LAL @ POR,POR,3
3,Jump Shot,Jump Shot,43,20000012,33.8693,138,175,-118.1318,6,1,0,2000-01,52,22,0.0,2PT Field Goal,Right Side Center(RC),Mid-Range,16-24 ft.,1610612747,Los Angeles Lakers,2000-10-31,LAL @ POR,POR,4
4,Driving Dunk Shot,Dunk,155,20000012,34.0443,0,0,-118.2698,6,2,0,2000-01,19,0,1.0,2PT Field Goal,Center(C),Restricted Area,Less Than 8 ft.,1610612747,Los Angeles Lakers,2000-10-31,LAL @ POR,POR,5


In [310]:
data.dtypes

action_type            object
combined_shot_type     object
game_event_id           int64
game_id                 int64
lat                   float64
loc_x                   int64
loc_y                   int64
lon                   float64
minutes_remaining       int64
period                  int64
playoffs                int64
season                 object
seconds_remaining       int64
shot_distance           int64
shot_made_flag        float64
shot_type              object
shot_zone_area         object
shot_zone_basic        object
shot_zone_range        object
team_id                 int64
team_name              object
game_date              object
matchup                object
opponent               object
shot_id                 int64
dtype: object

In [311]:
data.shape

(30697, 25)

In [312]:
samples = pd.read_csv('sample_submission.csv')
len(samples)

5000

In [313]:
unknown_shots_mask = data['shot_made_flag'].isnull()
unknown_shots_mask.sum()

5000

In [314]:
len(unknown_shots_mask[unknown_shots_mask > 0].index.values == samples.shot_id.values -1) == unknown_shots_mask.sum()

True

In [315]:
data.corr()

Unnamed: 0,game_event_id,game_id,lat,loc_x,loc_y,lon,minutes_remaining,period,playoffs,seconds_remaining,shot_distance,shot_made_flag,team_id,shot_id
game_event_id,1.0,-0.002553,-0.060091,-0.028404,0.060091,-0.028404,-0.270736,0.956042,-0.004507,-0.021293,0.062077,-0.037232,,0.028478
game_id,-0.002553,1.0,0.013021,-0.019553,-0.013021,-0.019553,0.014481,0.009879,0.917235,-0.007107,-0.030327,-0.001612,,0.761656
lat,-0.060091,0.013021,1.0,0.016217,-1.0,0.016217,0.071462,-0.041248,-0.001143,0.052714,-0.818517,0.14807,,-0.03079
loc_x,-0.028404,-0.019553,0.016217,1.0,-0.016217,1.0,0.006898,-0.028165,-0.01279,0.001083,0.022654,-0.000848,,-0.019917
loc_y,0.060091,-0.013021,-1.0,-0.016217,1.0,-0.016217,-0.071462,0.041248,0.001143,-0.052714,0.818517,-0.14807,,0.03079
lon,-0.028404,-0.019553,0.016217,1.0,-0.016217,1.0,0.006898,-0.028165,-0.01279,0.001083,0.022654,-0.000848,,-0.019917
minutes_remaining,-0.270736,0.014481,0.071462,0.006898,-0.071462,0.006898,1.0,-0.043425,0.015101,0.025558,-0.059767,0.028342,,-0.005731
period,0.956042,0.009879,-0.041248,-0.028165,0.041248,-0.028165,-0.043425,1.0,0.008789,0.005832,0.046993,-0.032152,,0.000364
playoffs,-0.004507,0.917235,-0.001143,-0.01279,0.001143,-0.01279,0.015101,0.008789,1.0,-0.00249,-0.008667,-0.001257,,0.612571
seconds_remaining,-0.021293,-0.007107,0.052714,0.001083,-0.052714,0.001083,0.025558,0.005832,-0.00249,1.0,-0.051482,0.030804,,-0.004868


In [316]:
data[['lat', 'loc_x', 'loc_y', 'lon' ]].corr()

Unnamed: 0,lat,loc_x,loc_y,lon
lat,1.0,0.016217,-1.0,0.016217
loc_x,0.016217,1.0,-0.016217,1.0
loc_y,-1.0,-0.016217,1.0,-0.016217
lon,0.016217,1.0,-0.016217,1.0


In [317]:
target = data['shot_made_flag']

In [318]:
data.drop(['team_id', 'lat', 'lon', 'game_id', 'game_event_id', 'team_name', 'shot_id', 'shot_made_flag'], axis=1, inplace=True)
data.head()

Unnamed: 0,action_type,combined_shot_type,loc_x,loc_y,minutes_remaining,period,playoffs,season,seconds_remaining,shot_distance,shot_type,shot_zone_area,shot_zone_basic,shot_zone_range,game_date,matchup,opponent
0,Jump Shot,Jump Shot,167,72,10,1,0,2000-01,27,18,2PT Field Goal,Right Side(R),Mid-Range,16-24 ft.,2000-10-31,LAL @ POR,POR
1,Jump Shot,Jump Shot,-157,0,10,1,0,2000-01,22,15,2PT Field Goal,Left Side(L),Mid-Range,8-16 ft.,2000-10-31,LAL @ POR,POR
2,Jump Shot,Jump Shot,-101,135,7,1,0,2000-01,45,16,2PT Field Goal,Left Side Center(LC),Mid-Range,16-24 ft.,2000-10-31,LAL @ POR,POR
3,Jump Shot,Jump Shot,138,175,6,1,0,2000-01,52,22,2PT Field Goal,Right Side Center(RC),Mid-Range,16-24 ft.,2000-10-31,LAL @ POR,POR
4,Driving Dunk Shot,Dunk,0,0,6,2,0,2000-01,19,0,2PT Field Goal,Center(C),Restricted Area,Less Than 8 ft.,2000-10-31,LAL @ POR,POR


In [319]:
#Remaining Time
data['seconds_to_end'] = 60 * data['minutes_remaining'] + data['seconds_remaining']

In [320]:
data.drop(['minutes_remaining', 'seconds_remaining'], axis=1, inplace=True)

In [321]:
## Matchup - (home game)
data['home_game'] = data['matchup'].str.contains('vs').astype('int')
data.drop('matchup', axis=1, inplace=True)

In [322]:
data.head()

Unnamed: 0,action_type,combined_shot_type,loc_x,loc_y,period,playoffs,season,shot_distance,shot_type,shot_zone_area,shot_zone_basic,shot_zone_range,game_date,opponent,seconds_to_end,home_game
0,Jump Shot,Jump Shot,167,72,1,0,2000-01,18,2PT Field Goal,Right Side(R),Mid-Range,16-24 ft.,2000-10-31,POR,627,0
1,Jump Shot,Jump Shot,-157,0,1,0,2000-01,15,2PT Field Goal,Left Side(L),Mid-Range,8-16 ft.,2000-10-31,POR,622,0
2,Jump Shot,Jump Shot,-101,135,1,0,2000-01,16,2PT Field Goal,Left Side Center(LC),Mid-Range,16-24 ft.,2000-10-31,POR,465,0
3,Jump Shot,Jump Shot,138,175,1,0,2000-01,22,2PT Field Goal,Right Side Center(RC),Mid-Range,16-24 ft.,2000-10-31,POR,412,0
4,Driving Dunk Shot,Dunk,0,0,2,0,2000-01,0,2PT Field Goal,Center(C),Restricted Area,Less Than 8 ft.,2000-10-31,POR,379,0


In [323]:
# Date
data['game_date'] = pd.to_datetime(data['game_date'])
data['game_year'] = data['game_date'].dt.year
data.drop('game_date', axis=1, inplace=True)

In [324]:
data.head()

Unnamed: 0,action_type,combined_shot_type,loc_x,loc_y,period,playoffs,season,shot_distance,shot_type,shot_zone_area,shot_zone_basic,shot_zone_range,opponent,seconds_to_end,home_game,game_year
0,Jump Shot,Jump Shot,167,72,1,0,2000-01,18,2PT Field Goal,Right Side(R),Mid-Range,16-24 ft.,POR,627,0,2000
1,Jump Shot,Jump Shot,-157,0,1,0,2000-01,15,2PT Field Goal,Left Side(L),Mid-Range,8-16 ft.,POR,622,0,2000
2,Jump Shot,Jump Shot,-101,135,1,0,2000-01,16,2PT Field Goal,Left Side Center(LC),Mid-Range,16-24 ft.,POR,465,0,2000
3,Jump Shot,Jump Shot,138,175,1,0,2000-01,22,2PT Field Goal,Right Side Center(RC),Mid-Range,16-24 ft.,POR,412,0,2000
4,Driving Dunk Shot,Dunk,0,0,2,0,2000-01,0,2PT Field Goal,Center(C),Restricted Area,Less Than 8 ft.,POR,379,0,2000


In [325]:
# Replace 20 least common 'action_type' with 'least_common'
rare_action_types = data['action_type'].value_counts().sort_values().index.values[:20]
data.loc[data['action_type'].isin(rare_action_types), 'action_type'] = 'least_common'

In [326]:
len(data.loc[data['action_type'] == 'least_common'])

103

In [327]:
# Loc_x, and loc_y cut
data['loc_x'] = pd.cut(data['loc_x'], 25)
data['loc_y'] = pd.cut(data['loc_y'], 25)
data.head()

Unnamed: 0,action_type,combined_shot_type,loc_x,loc_y,period,playoffs,season,shot_distance,shot_type,shot_zone_area,shot_zone_basic,shot_zone_range,opponent,seconds_to_end,home_game,game_year
0,Jump Shot,Jump Shot,"(148.4, 168.32]","(56.2, 89.6]",1,0,2000-01,18,2PT Field Goal,Right Side(R),Mid-Range,16-24 ft.,POR,627,0,2000
1,Jump Shot,Jump Shot,"(-170.32, -150.4]","(-10.6, 22.8]",1,0,2000-01,15,2PT Field Goal,Left Side(L),Mid-Range,8-16 ft.,POR,622,0,2000
2,Jump Shot,Jump Shot,"(-110.56, -90.64]","(123, 156.4]",1,0,2000-01,16,2PT Field Goal,Left Side Center(LC),Mid-Range,16-24 ft.,POR,465,0,2000
3,Jump Shot,Jump Shot,"(128.48, 148.4]","(156.4, 189.8]",1,0,2000-01,22,2PT Field Goal,Right Side Center(RC),Mid-Range,16-24 ft.,POR,412,0,2000
4,Driving Dunk Shot,Dunk,"(-10.96, 8.96]","(-10.6, 22.8]",2,0,2000-01,0,2PT Field Goal,Center(C),Restricted Area,Less Than 8 ft.,POR,379,0,2000


In [328]:
# Categorical Columns
cat_cols = [
    'action_type', 'combined_shot_type', 'period', 'season', 'shot_type',
    'shot_zone_area', 'shot_zone_basic', 'shot_zone_range', 'game_year',
    'opponent', 'loc_x', 'loc_y']

In [329]:
model = pd.get_dummies(data, columns =  cat_cols, drop_first=True)

In [330]:
model.shape

(30697, 187)

In [331]:
model.head()

Unnamed: 0,playoffs,shot_distance,seconds_to_end,home_game,action_type_Alley Oop Layup shot,action_type_Driving Dunk Shot,action_type_Driving Finger Roll Layup Shot,action_type_Driving Finger Roll Shot,action_type_Driving Jump shot,action_type_Driving Layup Shot,action_type_Driving Reverse Layup Shot,action_type_Driving Slam Dunk Shot,action_type_Dunk Shot,action_type_Fadeaway Bank shot,action_type_Fadeaway Jump Shot,action_type_Finger Roll Layup Shot,action_type_Finger Roll Shot,action_type_Floating Jump shot,action_type_Follow Up Dunk Shot,action_type_Hook Shot,action_type_Jump Bank Shot,action_type_Jump Hook Shot,action_type_Jump Shot,action_type_Layup Shot,action_type_Pullup Jump shot,action_type_Putback Layup Shot,action_type_Reverse Dunk Shot,action_type_Reverse Layup Shot,action_type_Reverse Slam Dunk Shot,action_type_Running Bank shot,action_type_Running Dunk Shot,action_type_Running Hook Shot,action_type_Running Jump Shot,action_type_Running Layup Shot,action_type_Slam Dunk Shot,action_type_Step Back Jump shot,action_type_Tip Shot,action_type_Turnaround Bank shot,action_type_Turnaround Fadeaway shot,action_type_Turnaround Jump Shot,action_type_least_common,combined_shot_type_Dunk,combined_shot_type_Hook Shot,combined_shot_type_Jump Shot,combined_shot_type_Layup,combined_shot_type_Tip Shot,period_2,period_3,period_4,period_5,period_6,period_7,season_1997-98,season_1998-99,season_1999-00,season_2000-01,season_2001-02,season_2002-03,season_2003-04,season_2004-05,season_2005-06,season_2006-07,season_2007-08,season_2008-09,season_2009-10,season_2010-11,season_2011-12,season_2012-13,season_2013-14,season_2014-15,season_2015-16,shot_type_3PT Field Goal,shot_zone_area_Center(C),shot_zone_area_Left Side Center(LC),shot_zone_area_Left Side(L),shot_zone_area_Right Side Center(RC),shot_zone_area_Right Side(R),shot_zone_basic_Backcourt,shot_zone_basic_In The Paint (Non-RA),shot_zone_basic_Left Corner 3,shot_zone_basic_Mid-Range,shot_zone_basic_Restricted Area,shot_zone_basic_Right Corner 3,shot_zone_range_24+ ft.,shot_zone_range_8-16 ft.,shot_zone_range_Back Court Shot,shot_zone_range_Less Than 8 ft.,game_year_1997,game_year_1998,game_year_1999,game_year_2000,game_year_2001,game_year_2002,game_year_2003,game_year_2004,game_year_2005,game_year_2006,game_year_2007,game_year_2008,game_year_2009,game_year_2010,game_year_2011,game_year_2012,game_year_2013,game_year_2014,game_year_2015,game_year_2016,opponent_BKN,opponent_BOS,opponent_CHA,opponent_CHI,opponent_CLE,opponent_DAL,opponent_DEN,opponent_DET,opponent_GSW,opponent_HOU,opponent_IND,opponent_LAC,opponent_MEM,opponent_MIA,opponent_MIL,opponent_MIN,opponent_NJN,opponent_NOH,opponent_NOP,opponent_NYK,opponent_OKC,opponent_ORL,opponent_PHI,opponent_PHX,opponent_POR,opponent_SAC,opponent_SAS,opponent_SEA,opponent_TOR,opponent_UTA,opponent_VAN,opponent_WAS,"loc_x_(-230.08, -210.16]","loc_x_(-210.16, -190.24]","loc_x_(-190.24, -170.32]","loc_x_(-170.32, -150.4]","loc_x_(-150.4, -130.48]","loc_x_(-130.48, -110.56]","loc_x_(-110.56, -90.64]","loc_x_(-90.64, -70.72]","loc_x_(-70.72, -50.8]","loc_x_(-50.8, -30.88]","loc_x_(-30.88, -10.96]","loc_x_(-10.96, 8.96]","loc_x_(8.96, 28.88]","loc_x_(28.88, 48.8]","loc_x_(48.8, 68.72]","loc_x_(68.72, 88.64]","loc_x_(88.64, 108.56]","loc_x_(108.56, 128.48]","loc_x_(128.48, 148.4]","loc_x_(148.4, 168.32]","loc_x_(168.32, 188.24]","loc_x_(188.24, 208.16]","loc_x_(208.16, 228.08]","loc_x_(228.08, 248]","loc_y_(-10.6, 22.8]","loc_y_(22.8, 56.2]","loc_y_(56.2, 89.6]","loc_y_(89.6, 123]","loc_y_(123, 156.4]","loc_y_(156.4, 189.8]","loc_y_(189.8, 223.2]","loc_y_(223.2, 256.6]","loc_y_(256.6, 290]","loc_y_(290, 323.4]","loc_y_(323.4, 356.8]","loc_y_(356.8, 390.2]","loc_y_(390.2, 423.6]","loc_y_(423.6, 457]","loc_y_(457, 490.4]","loc_y_(490.4, 523.8]","loc_y_(523.8, 557.2]","loc_y_(557.2, 590.6]","loc_y_(590.6, 624]","loc_y_(624, 657.4]","loc_y_(657.4, 690.8]","loc_y_(690.8, 724.2]","loc_y_(724.2, 757.6]","loc_y_(757.6, 791]"
0,0,18,627,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,15,622,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,16,465,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,22,412,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,379,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [332]:
model.columns

Index(['playoffs', 'shot_distance', 'seconds_to_end', 'home_game',
       'action_type_Alley Oop Layup shot', 'action_type_Driving Dunk Shot',
       'action_type_Driving Finger Roll Layup Shot',
       'action_type_Driving Finger Roll Shot', 'action_type_Driving Jump shot',
       'action_type_Driving Layup Shot',
       ...
       'loc_y_(457, 490.4]', 'loc_y_(490.4, 523.8]', 'loc_y_(523.8, 557.2]',
       'loc_y_(557.2, 590.6]', 'loc_y_(590.6, 624]', 'loc_y_(624, 657.4]',
       'loc_y_(657.4, 690.8]', 'loc_y_(690.8, 724.2]', 'loc_y_(724.2, 757.6]',
       'loc_y_(757.6, 791]'],
      dtype='object', length=187)

In [333]:
X = model[~unknown_shots_mask]
X.shape

(25697, 187)

In [334]:
Y = target[~unknown_shots_mask]
Y.shape

(25697,)

In [335]:
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [336]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state=4)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
metrics.accuracy_score(Y_test, Y_pred)

0.56980544747081707

In [337]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()

In [338]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state=4)
Y_pred = gnb.fit(X_train, Y_train).predict(X_test)
metrics.accuracy_score(Y_test, Y_pred)

0.6529182879377432

In [339]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=10)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state=4)
clf = clf.fit(X_train, Y_train)
clf.score(X_test, Y_test)

0.63750972762645919