# Different Machine Learning Models

### Import required Libraries

In [16]:
import pandas as pd
import numpy as np

from sklearn import model_selection
from sklearn import metrics

import warnings
warnings.simplefilter(action = "ignore")

from mlxtend.classifier import StackingClassifier
from sklearn.model_selection import train_test_split

### Import Data

In [17]:
mf = pd.read_csv('mf.csv')
x = mf.drop(['winner'],axis=1)
y = mf['winner']
x.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 696 entries, 0 to 695
Data columns (total 5 columns):
team1_id         696 non-null int64
team2_id         696 non-null int64
toss_winner      696 non-null int64
toss_decision    696 non-null int64
venue            696 non-null int64
dtypes: int64(5)
memory usage: 27.3 KB


### 1) Logistic Regression

In [18]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression().fit(x,y)
predictions = clf.predict(x)
accuracy = metrics.accuracy_score(y,predictions)
print(accuracy)

0.29310344827586204


In [19]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.05, random_state=0)
model = clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
accuracy = metrics.accuracy_score(y_test,predictions)
print(accuracy)

0.22857142857142856


### 2) K Nearest Neighbours

In [20]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=5).fit(x,y)  #Highest at K=5
predictions = clf.predict(x)
accuracy = metrics.accuracy_score(y,predictions)
print(accuracy)

0.5732758620689655


In [21]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.05, random_state=0)
model = clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
accuracy = metrics.accuracy_score(y_test,predictions)
print(accuracy)

0.45714285714285713


### 3) Naive Bayes Classifier

In [22]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB().fit(x,y)
predictions = clf.predict(x)
accuracy = metrics.accuracy_score(y,predictions)
print(accuracy)

0.14655172413793102


In [23]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.05, random_state=0)
model = clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
accuracy = metrics.accuracy_score(y_test,predictions)
print(accuracy)

0.14285714285714285


### 4) Decision Tree Classifier

In [24]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier().fit(x,y)
predictions = clf.predict(x)
accuracy = metrics.accuracy_score(y,predictions)
print(accuracy)

0.8620689655172413


In [25]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.05, random_state=0)
model = clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
accuracy = metrics.accuracy_score(y_test,predictions)
print(accuracy)

0.6285714285714286


### 5) Random Forest Classifier

In [76]:
from sklearn.ensemble import RandomForestClassifier
clf =  RandomForestClassifier(n_estimators=50,random_state=3,max_depth=8).fit(x,y)
predictions = clf.predict(x)
accuracy = metrics.accuracy_score(y,predictions)
print(accuracy)

0.7758620689655172


In [77]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.05, random_state=0)
model = clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
accuracy = metrics.accuracy_score(y_test,predictions)
print(accuracy)

0.7428571428571429


### 6) XGBoost Classifier

In [52]:
import xgboost as xgb
clf = xgb.XGBClassifier(max_depth=1000, learning_rate=0.1, n_estimators=10, objective='binary:logistic', booster='dart')
clf = clf.fit(x,y)
predictions = clf.predict(x)
accuracy = metrics.accuracy_score(y,predictions)
print(accuracy)

0.7385057471264368


In [68]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.05, random_state=0)
model = clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
accuracy = metrics.accuracy_score(y_test,predictions)
print(accuracy)

0.7428571428571429


## Stacking Classifiers

In [101]:
clf1 = KNeighborsClassifier(n_neighbors=5)
clf2 = LogisticRegression()
clf3 = GaussianNB()
clf4 = RandomForestClassifier(n_estimators=25,max_depth=8)
clf5 = DecisionTreeClassifier()
clf6 =  xgb.XGBClassifier(max_depth=1000, learning_rate=0.1, n_estimators=10, verbosity=2, silent=False, objective='binary:logistic', booster='dart', n_jobs=1, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, colsample_bynode=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, random_state=0, seed=None, missing=None)
mc =  RandomForestClassifier(n_estimators=20,max_depth=8)
sclf = StackingClassifier(classifiers=[clf1, clf2, clf3,clf4,clf5,clf6], 
                          meta_classifier=mc)

In [100]:
model = sclf.fit(x,y)
predictions = sclf.predict(x)
accuracy = metrics.accuracy_score(y,predictions)
print(accuracy)

0.8548850574712644


In [117]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.05,random_state=1)
model = sclf.fit(X_train,y_train)
predictions = sclf.predict(X_test)
accuracy = metrics.accuracy_score(y_test,predictions)
print(accuracy)

0.6857142857142857
