In [4]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings('ignore')

## Reading Data and Data Wrangling

In [5]:
data = pd.read_csv(r'C:\Users\kukumar\OneDrive - AMDOCS\Backup Folders\Documents\GitHub\LearningPython\DataAnalysis_with_Python\heart_dataset\heart_dataset_updated.csv')

In [6]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1,3.0,145.0,233.0,1,0.0,150.0,0,2.3,0,0,1.0,1
1,37.0,1,2.0,130.0,,0,1.0,187.0,0,3.5,0,0,2.0,1
2,41.0,0,1.0,130.0,204.0,0,0.0,172.0,0,1.4,2,0,2.0,1
3,56.0,1,1.0,120.0,236.0,0,1.0,178.0,0,0.8,2,0,2.0,1
4,57.0,0,0.0,120.0,354.0,0,1.0,163.0,1,0.6,2,0,2.0,1


In [7]:
data.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [8]:
data.shape

(303, 14)

In [9]:
data.isnull().sum()

age         4
sex         0
cp          6
trestbps    4
chol        4
fbs         0
restecg     1
thalach     3
exang       0
oldpeak     0
slope       0
ca          0
thal        1
target      0
dtype: int64

In [10]:
from sklearn.preprocessing import Imputer

In [11]:
imr = Imputer(missing_values='NaN',strategy='mean',axis=0)



In [12]:
imputed_data = imr.fit_transform(data)

In [13]:
imputed_data

array([[63.,  1.,  3., ...,  0.,  1.,  1.],
       [37.,  1.,  2., ...,  0.,  2.,  1.],
       [41.,  0.,  1., ...,  0.,  2.,  1.],
       ...,
       [68.,  1.,  0., ...,  2.,  3.,  0.],
       [57.,  1.,  0., ...,  1.,  3.,  0.],
       [57.,  0.,  1., ...,  1.,  2.,  0.]])

In [14]:
dataframe = pd.DataFrame.from_records(imputed_data)

In [15]:
dataframe.columns = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']

In [16]:
dataframe.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,3.0,145.0,233.0,1.0,0.0,150.0,0.0,2.3,0.0,0.0,1.0,1.0
1,37.0,1.0,2.0,130.0,245.652174,0.0,1.0,187.0,0.0,3.5,0.0,0.0,2.0,1.0
2,41.0,0.0,1.0,130.0,204.0,0.0,0.0,172.0,0.0,1.4,2.0,0.0,2.0,1.0
3,56.0,1.0,1.0,120.0,236.0,0.0,1.0,178.0,0.0,0.8,2.0,0.0,2.0,1.0
4,57.0,0.0,0.0,120.0,354.0,0.0,1.0,163.0,1.0,0.6,2.0,0.0,2.0,1.0


In [17]:
dataframe.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

## Preparing data for Processing

In [18]:
X = dataframe.drop(['target'],axis=1)

In [19]:
y = dataframe['target'].values

In [20]:
y.head()

AttributeError: 'numpy.ndarray' object has no attribute 'head'

In [21]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [22]:
scalar = StandardScaler()

In [23]:
X_std = scalar.fit_transform(X)

In [25]:
X_std_train,X_std_test,y_train,y_test = train_test_split(X_std,y,train_size=0.8,random_state=10)

## Using Logistic Regression

In [63]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [55]:
q2params = {'penalty':['l1','l2'],'C':np.linspace(start=0.001,stop=2,num=100)}

In [56]:
grid = GridSearchCV(LogisticRegression(),params,refit=True,verbose=4)

In [57]:
grid.fit(X_std_train,y_train)

Fitting 3 folds for each of 200 candidates, totalling 600 fits
[CV] C=0.001, penalty=l1 .............................................
[CV] .... C=0.001, penalty=l1, score=0.4268292682926829, total=   0.0s
[CV] C=0.001, penalty=l1 .............................................
[CV] ................. C=0.001, penalty=l1, score=0.425, total=   0.0s
[CV] C=0.001, penalty=l1 .............................................
[CV] ................. C=0.001, penalty=l1, score=0.425, total=   0.0s
[CV] C=0.001, penalty=l2 .............................................
[CV] .... C=0.001, penalty=l2, score=0.7926829268292683, total=   0.0s
[CV] C=0.001, penalty=l2 .............................................
[CV] .................. C=0.001, penalty=l2, score=0.85, total=   0.0s
[CV] C=0.001, penalty=l2 .............................................
[CV] ................ C=0.001, penalty=l2, score=0.8125, total=   0.0s
[CV] C=0.021191919191919192, penalty=l1 ..............................
[CV]  C=0.0211

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s


[CV] .... C=0.08176767676767677, penalty=l2, score=0.85, total=   0.0s
[CV] C=0.08176767676767677, penalty=l2 ...............................
[CV] ... C=0.08176767676767677, penalty=l2, score=0.825, total=   0.0s
[CV] C=0.10195959595959596, penalty=l1 ...............................
[CV]  C=0.10195959595959596, penalty=l1, score=0.8292682926829268, total=   0.0s
[CV] C=0.10195959595959596, penalty=l1 ...............................
[CV] .. C=0.10195959595959596, penalty=l1, score=0.8625, total=   0.0s
[CV] C=0.10195959595959596, penalty=l1 ...............................
[CV] .... C=0.10195959595959596, penalty=l1, score=0.85, total=   0.0s
[CV] C=0.10195959595959596, penalty=l2 ...............................
[CV]  C=0.10195959595959596, penalty=l2, score=0.8048780487804879, total=   0.0s
[CV] C=0.10195959595959596, penalty=l2 ...............................
[CV] .. C=0.10195959595959596, penalty=l2, score=0.8375, total=   0.0s
[CV] C=0.10195959595959596, penalty=l2 ..................

[CV]  C=0.32407070707070706, penalty=l2, score=0.8170731707317073, total=   0.0s
[CV] C=0.32407070707070706, penalty=l2 ...............................
[CV] .. C=0.32407070707070706, penalty=l2, score=0.8375, total=   0.0s
[CV] C=0.32407070707070706, penalty=l2 ...............................
[CV] .. C=0.32407070707070706, penalty=l2, score=0.8375, total=   0.0s
[CV] C=0.34426262626262627, penalty=l1 ...............................
[CV]  C=0.34426262626262627, penalty=l1, score=0.7926829268292683, total=   0.0s
[CV] C=0.34426262626262627, penalty=l1 ...............................
[CV] .. C=0.34426262626262627, penalty=l1, score=0.8375, total=   0.0s
[CV] C=0.34426262626262627, penalty=l1 ...............................
[CV] .... C=0.34426262626262627, penalty=l1, score=0.85, total=   0.0s
[CV] C=0.34426262626262627, penalty=l2 ...............................
[CV]  C=0.34426262626262627, penalty=l2, score=0.8170731707317073, total=   0.0s
[CV] C=0.34426262626262627, penalty=l2 ........

[CV] .... C=0.6269494949494949, penalty=l1, score=0.825, total=   0.0s
[CV] C=0.6269494949494949, penalty=l1 ................................
[CV] ..... C=0.6269494949494949, penalty=l1, score=0.85, total=   0.0s
[CV] C=0.6269494949494949, penalty=l2 ................................
[CV]  C=0.6269494949494949, penalty=l2, score=0.8048780487804879, total=   0.0s
[CV] C=0.6269494949494949, penalty=l2 ................................
[CV] ... C=0.6269494949494949, penalty=l2, score=0.8375, total=   0.0s
[CV] C=0.6269494949494949, penalty=l2 ................................
[CV] ... C=0.6269494949494949, penalty=l2, score=0.8375, total=   0.0s
[CV] C=0.6471414141414141, penalty=l1 ................................
[CV]  C=0.6471414141414141, penalty=l1, score=0.7926829268292683, total=   0.0s
[CV] C=0.6471414141414141, penalty=l1 ................................
[CV] .... C=0.6471414141414141, penalty=l1, score=0.825, total=   0.0s
[CV] C=0.6471414141414141, penalty=l1 .....................

[CV] ... C=0.9298282828282828, penalty=l2, score=0.8375, total=   0.0s
[CV] C=0.9298282828282828, penalty=l2 ................................
[CV] ... C=0.9298282828282828, penalty=l2, score=0.8375, total=   0.0s
[CV] C=0.950020202020202, penalty=l1 .................................
[CV]  C=0.950020202020202, penalty=l1, score=0.7926829268292683, total=   0.0s
[CV] C=0.950020202020202, penalty=l1 .................................
[CV] ..... C=0.950020202020202, penalty=l1, score=0.825, total=   0.0s
[CV] C=0.950020202020202, penalty=l1 .................................
[CV] .... C=0.950020202020202, penalty=l1, score=0.8375, total=   0.0s
[CV] C=0.950020202020202, penalty=l2 .................................
[CV]  C=0.950020202020202, penalty=l2, score=0.8048780487804879, total=   0.0s
[CV] C=0.950020202020202, penalty=l2 .................................
[CV] .... C=0.950020202020202, penalty=l2, score=0.8375, total=   0.0s
[CV] C=0.950020202020202, penalty=l2 ........................

[CV] ..... C=1.273090909090909, penalty=l1, score=0.825, total=   0.0s
[CV] C=1.273090909090909, penalty=l1 .................................
[CV] .... C=1.273090909090909, penalty=l1, score=0.8375, total=   0.0s
[CV] C=1.273090909090909, penalty=l2 .................................
[CV]  C=1.273090909090909, penalty=l2, score=0.8048780487804879, total=   0.0s
[CV] C=1.273090909090909, penalty=l2 .................................
[CV] .... C=1.273090909090909, penalty=l2, score=0.8375, total=   0.0s
[CV] C=1.273090909090909, penalty=l2 .................................
[CV] .... C=1.273090909090909, penalty=l2, score=0.8375, total=   0.0s
[CV] C=1.2932828282828281, penalty=l1 ................................
[CV]  C=1.2932828282828281, penalty=l1, score=0.7926829268292683, total=   0.0s
[CV] C=1.2932828282828281, penalty=l1 ................................
[CV] ... C=1.2932828282828281, penalty=l1, score=0.8375, total=   0.0s
[CV] C=1.2932828282828281, penalty=l1 ......................

[CV] .... C=1.4750101010101009, penalty=l1, score=0.825, total=   0.0s
[CV] C=1.4750101010101009, penalty=l2 ................................
[CV]  C=1.4750101010101009, penalty=l2, score=0.8048780487804879, total=   0.0s
[CV] C=1.4750101010101009, penalty=l2 ................................
[CV] ... C=1.4750101010101009, penalty=l2, score=0.8375, total=   0.0s
[CV] C=1.4750101010101009, penalty=l2 ................................
[CV] ... C=1.4750101010101009, penalty=l2, score=0.8375, total=   0.0s
[CV] C=1.49520202020202, penalty=l1 ..................................
[CV]  C=1.49520202020202, penalty=l1, score=0.7926829268292683, total=   0.0s
[CV] C=1.49520202020202, penalty=l1 ..................................
[CV] ..... C=1.49520202020202, penalty=l1, score=0.8375, total=   0.0s
[CV] C=1.49520202020202, penalty=l1 ..................................
[CV] ...... C=1.49520202020202, penalty=l1, score=0.825, total=   0.0s
[CV] C=1.49520202020202, penalty=l2 .........................

[CV]  C=1.6769292929292927, penalty=l2, score=0.8048780487804879, total=   0.0s
[CV] C=1.6769292929292927, penalty=l2 ................................
[CV] ... C=1.6769292929292927, penalty=l2, score=0.8375, total=   0.0s
[CV] C=1.6769292929292927, penalty=l2 ................................
[CV] ... C=1.6769292929292927, penalty=l2, score=0.8375, total=   0.0s
[CV] C=1.697121212121212, penalty=l1 .................................
[CV]  C=1.697121212121212, penalty=l1, score=0.7926829268292683, total=   0.0s
[CV] C=1.697121212121212, penalty=l1 .................................
[CV] .... C=1.697121212121212, penalty=l1, score=0.8375, total=   0.0s
[CV] C=1.697121212121212, penalty=l1 .................................
[CV] ..... C=1.697121212121212, penalty=l1, score=0.825, total=   0.0s
[CV] C=1.697121212121212, penalty=l2 .................................
[CV]  C=1.697121212121212, penalty=l2, score=0.8048780487804879, total=   0.0s
[CV] C=1.697121212121212, penalty=l2 ...............

[Parallel(n_jobs=1)]: Done 600 out of 600 | elapsed:    2.3s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'penalty': ['l1', 'l2'], 'C': array([1.00000e-03, 2.11919e-02, ..., 1.97981e+00, 2.00000e+00])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=4)

In [58]:
grid.best_params_

{'C': 0.08176767676767677, 'penalty': 'l1'}

In [69]:
logistic_Regression = LogisticRegression(C=0.08176767676767677,penalty='l1',random_state=10)

In [70]:
logistic_Regression.fit(X_std_train,y_train)

LogisticRegression(C=0.08176767676767677, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='warn', n_jobs=None, penalty='l1', random_state=10,
          solver='warn', tol=0.0001, verbose=0, warm_start=False)

In [71]:
y_pred = logistic_Regression.predict(X_std_test)

In [72]:
accuracy_score(y_test,y_pred)

0.8032786885245902

## Using K Nearest Neighbour Classifier

In [73]:
from sklearn.neighbors import KNeighborsClassifier

In [74]:
params = {'n_neighbors':[1,2,3,4,5,6,7,8,9,10],'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute']}

In [75]:
grid = GridSearchCV(KNeighborsClassifier(),params,refit=True,verbose=4)

In [76]:
grid.fit(X_std_train,y_train)

Fitting 3 folds for each of 40 candidates, totalling 120 fits
[CV] algorithm=auto, n_neighbors=1 ...................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  algorithm=auto, n_neighbors=1, score=0.7804878048780488, total=   0.2s
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] ........ algorithm=auto, n_neighbors=1, score=0.75, total=   0.0s
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] ...... algorithm=auto, n_neighbors=1, score=0.8125, total=   0.0s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV]  algorithm=auto, n_neighbors=2, score=0.7926829268292683, total=   0.0s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] ...... algorithm=auto, n_neighbors=2, score=0.7375, total=   0.0s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] ......... algorithm=auto, n_neighbors=2, score=0.8, total=   0.0s
[CV] algorithm=auto, n_neighbors=3 ...................................
[CV]  algorithm=auto, n_neighbors=3, score=0.7682926829268293, total=   0.0s
[CV] algorithm=auto, n_neighbors=3 ........................

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.2s remaining:    0.0s


[CV]  algorithm=auto, n_neighbors=5, score=0.7682926829268293, total=   0.0s
[CV] algorithm=auto, n_neighbors=5 ...................................
[CV] ....... algorithm=auto, n_neighbors=5, score=0.875, total=   0.0s
[CV] algorithm=auto, n_neighbors=5 ...................................
[CV] ...... algorithm=auto, n_neighbors=5, score=0.8375, total=   0.0s
[CV] algorithm=auto, n_neighbors=6 ...................................
[CV]  algorithm=auto, n_neighbors=6, score=0.7804878048780488, total=   0.0s
[CV] algorithm=auto, n_neighbors=6 ...................................
[CV] ....... algorithm=auto, n_neighbors=6, score=0.875, total=   0.0s
[CV] algorithm=auto, n_neighbors=6 ...................................
[CV] ....... algorithm=auto, n_neighbors=6, score=0.825, total=   0.0s
[CV] algorithm=auto, n_neighbors=7 ...................................
[CV]  algorithm=auto, n_neighbors=7, score=0.7682926829268293, total=   0.0s
[CV] algorithm=auto, n_neighbors=7 ........................

[CV] ..... algorithm=kd_tree, n_neighbors=4, score=0.85, total=   0.0s
[CV] algorithm=kd_tree, n_neighbors=5 ................................
[CV]  algorithm=kd_tree, n_neighbors=5, score=0.7682926829268293, total=   0.0s
[CV] algorithm=kd_tree, n_neighbors=5 ................................
[CV] .... algorithm=kd_tree, n_neighbors=5, score=0.875, total=   0.0s
[CV] algorithm=kd_tree, n_neighbors=5 ................................
[CV] ... algorithm=kd_tree, n_neighbors=5, score=0.8375, total=   0.0s
[CV] algorithm=kd_tree, n_neighbors=6 ................................
[CV]  algorithm=kd_tree, n_neighbors=6, score=0.7804878048780488, total=   0.0s
[CV] algorithm=kd_tree, n_neighbors=6 ................................
[CV] .... algorithm=kd_tree, n_neighbors=6, score=0.875, total=   0.0s
[CV] algorithm=kd_tree, n_neighbors=6 ................................
[CV] .... algorithm=kd_tree, n_neighbors=6, score=0.825, total=   0.0s
[CV] algorithm=kd_tree, n_neighbors=7 .....................

[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed:    2.7s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=4)

In [77]:
grid.best_params_

{'algorithm': 'auto', 'n_neighbors': 7}

In [79]:
KNeighbours_Classifier = KNeighborsClassifier(n_neighbors=7,algorithm='auto',n_jobs=-1)

In [80]:
KNeighbours_Classifier.fit(X_std_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=7, p=2,
           weights='uniform')

In [83]:
y_pred_K_neighbour = KNeighbours_Classifier.predict(X_std_test)

In [84]:
accuracy_score(y_test,y_pred_K_neighbour)

0.8032786885245902

## Using Decision Tree Classifier With PCA

In [87]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA

In [182]:
pipeline_DTC = make_pipeline(PCA(n_components=3),DecisionTreeClassifier(random_state=10))

In [183]:
pipeline_DTC.fit(X_std_train,y_train)

Pipeline(memory=None,
     steps=[('pca', PCA(copy=True, iterated_power='auto', n_components=3, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('decisiontreeclassifier', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
           ...        min_weight_fraction_leaf=0.0, presort=False, random_state=10,
            splitter='best'))])

In [184]:
y_pred_DTC = pipeline_DTC.predict(X_std_test)

In [185]:
accuracy_score(y_test,y_pred_DTC)

0.819672131147541

## Using Random Forest Classifier

In [193]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

In [188]:
# number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]

In [189]:
# number of features at every split
max_features = ['auto', 'sqrt']

In [190]:
# max depth
max_depth = [int(x) for x in np.linspace(100, 500, num = 11)]
max_depth.append(None)

In [191]:
# create random grid
random_grid = {
 'n_estimators': n_estimators,
 'max_features': max_features,
 'max_depth': max_depth
 }


In [194]:
rfc_random = RandomizedSearchCV(estimator = RandomForestClassifier(random_state=10), param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=10, n_jobs = -1)

In [195]:
# Fit the model
rfc_random.fit(X_std_train, y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   54.6s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  4.2min finished


RandomizedSearchCV(cv=3, error_score='raise-deprecating',
          estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
            oob_score=False, random_state=10, verbose=0, warm_start=False),
          fit_params=None, iid='warn', n_iter=100, n_jobs=-1,
          param_distributions={'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt'], 'max_depth': [100, 140, 180, 220, 260, 300, 340, 380, 420, 460, 500, None]},
          pre_dispatch='2*n_jobs', random_state=10, refit=True,
          return_train_score='warn', scoring=None, verbose=2)

In [196]:
rfc_random.best_params_

{'n_estimators': 1000, 'max_features': 'auto', 'max_depth': 460}

In [198]:
rfc = RandomForestClassifier(n_estimators=1000,max_features='auto',max_depth=460,random_state=10)

In [199]:
rfc.fit(X_std_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=460, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=None,
            oob_score=False, random_state=10, verbose=0, warm_start=False)

In [200]:
y_pred_rfc = rfc.predict(X_std_test)

In [201]:
accuracy_score(y_test,y_pred_rfc)

0.8032786885245902