# Without Months With Odds

In [110]:
import sklearn as sk
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from sklearn.preprocessing import scale
from sklearn.cross_validation import cross_val_score
from hyperopt import fmin, tpe, hp, STATUS_OK,Trials
from sklearn.cross_validation import cross_val_score, KFold

In [111]:
#DATASET IMPORT
df=pd.read_csv('ExportTrainingWithoutMonthsAndLowOdds17.csv',low_memory=False)
df=df.dropna (axis=0, how="any")
#Choose the features
X=df[["PureProbaHome","PureProbaAway",
               'RelativePointsDifference',
              'RelativeSumTeamGoals','RelativeEatSumTeamGoals']]
y=df[['FullTimeResultID']]

In [112]:
def hyperopt_train_test(params):
    X_ = X[:]

    if 'normalize' in params:
        if params['normalize'] == 1:
            X_ = normalize(X_)
        del params['normalize']

    if 'scale' in params:
        if params['scale'] == 1:
            X_ = scale(X_)
        del params['scale']
    #clf = LogisticRegression(**params)
    #clf = RandomForestClassifier(**params)
    #clf = DecisionTreeClassifier(**params)
    clf = KNeighborsClassifier(**params)
        
    cv=KFold(X_.shape[0],5,shuffle=True,random_state=5)
    return cross_val_score(clf, X_, y.values.ravel(),cv=cv).mean()

In [113]:
best = 0
def f(params):
    global best
    acc = hyperopt_train_test(params)
    if acc > best:
        best = acc
        print 'new best:', best, params
    return {'loss': -acc, 'status': STATUS_OK}

In [114]:
Logistic4space = {
    'C': hp.uniform('C', 0, 20),     
    'penalty': hp.choice('penalty', ['l2']),
    'scale': hp.choice('scale', [0, 1]),
    'normalize': hp.choice('normalize', [0, 1]),
    'intercept_scaling': hp.uniform('intercept_scaling', 0, 10),
    'solver': hp.choice('solver', ['lbfgs','liblinear','sag', 'newton-cg']),
    'class_weight': hp.choice('class_weight', ['balanced',None]), 
    'max_iter': hp.quniform('max_iter',1,100000,1)
}

In [115]:
Forest4space = {
    'n_estimators': hp.choice('n_estimators', [5,10,15,20,100]),
    'criterion': hp.choice('criterion', ['gini','entropy']),
    'max_depth': hp.quniform('max_depth',1,20,1),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0, 0.5),     
    'class_weight': hp.choice('class_weight', ['balanced',None]), 
    'oob_score': hp.choice('oob_score', [True,False]),
    'verbose': hp.quniform('verbose',1,100,1)
}

In [116]:
Desicion4space = {
    'criterion': hp.choice('criterion', ['gini','entropy']),
    'max_depth': hp.quniform('max_depth',1,20,1),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0, 0.5),     
    'class_weight': hp.choice('class_weight', ['balanced',None])
}

In [117]:
Neighbour4space = {
    'n_neighbors': hp.quniform('n_neighbors',1,1000,1),
}

In [118]:
#parameters = [ 'C','penalty','scale', 'normalize', 'intercept_scaling', 'solver','class_weight','max_iter']
#parameters = [ 'n_estimators','criterion','max_depth', 'min_weight_fraction_leaf', 'class_weight', 'oob_score','verbose']
#parameters = [ 'criterion','max_depth', 'min_weight_fraction_leaf', 'class_weight']
parameters = [ 'n_neighbors','algorithm', 'leaf_size', 'weights',]

### Change the comments and the space for other algorithm optimistation

In [119]:
trials = Trials()
best = fmin(f, Neighbour4space , algo=tpe.suggest, max_evals=10, trials=trials)
print 'best:'
print best

new best: 0.429891304348 {'n_neighbors': 970.0}
new best: 0.434433229814 {'n_neighbors': 249.0}
new best: 0.448395445135 {'n_neighbors': 37.0}
best:
{'n_neighbors': 37.0}


In [120]:
import winsound
winsound.Beep(300,2000)

In [121]:
cols = len(parameters)
f, axes = plt.subplots(nrows=1, ncols=cols, figsize=(20,5))
cmap = plt.cm.jet
for i, val in enumerate(parameters):
    xs = np.array([t['misc']['vals'][val] for t in trials.trials]).ravel()
    ys = [-t['result']['loss'] for t in trials.trials]
  
    axes[i].scatter(xs, ys, s=20, linewidth=0.01, alpha=0.25, c=cmap(float(i)/len(parameters)))
    axes[i].set_title(val)
    axes[i].set_ylim([0.6, 1.0])
plt.show()

KeyError: 'algorithm'

# Without Months,with all odds

#### LogisticRegression
clf=LogisticRegression(multi_class='multinomial',C=0.4594858284186373,intercept_scaling =0.6601243488037598,max_iter= 69573.0,class_weight=None,solver='lbfgs')
#### Random Forest
clf2=RandomForestClassifier(oob_score=True,verbose=70.0, n_estimators= 20, min_weight_fraction_leaf= 0.0005073465668121592, criterion='entropy', max_depth= 18.0, class_weight=None)
#### Desicion Tree
clf3=DecisionTreeClassifier(min_weight_fraction_leaf= 9.098497062798826e-05, criterion = 'entropy', max_depth = 19.0, class_weight= None)
#### Nearest Neigbour 
clf4=KNeighborsClassifier(n_neighbors=41)

# Without Months, and without low odds < 1.5

#### LogisticRegression
clf=LogisticRegression(C =5.843931905064714, intercept_scaling =0.8571990171497501, solver = 'sag', max_iter = 94813.0, penalty ='l2', class_weight= None)
#### Random Forest
clf2=RandomForestClassifier(oob_score= True, verbose= 88.0, n_estimators= 15 , min_weight_fraction_leaf =0.000133536321622163, criterion='entropy' , max_depth = 17.0, class_weight = None)
#### Desicion Tree
clf3=DecisionTreeClassifier(min_weight_fraction_leaf= 0.10375200619153369, criterion ='entropy', max_depth= 15.0, class_weight=None)
#### Nearest Neigbour 
clf4=KNeighborsClassifier(n_neighbors=47)
# Without Months, and without low odds <= 1.7

#### LogisticRegression
clf=LogisticRegression(C= 8.054656300305119, intercept_scaling = 1.5481631052682, solver ='newton-cg', max_iter = 538.0, penalty= 'l2', class_weight= None)
#### Random Forest
clf2=RandomForestClassifier(oob_score= True, verbose= 79.0, n_estimators= 5 , min_weight_fraction_leaf =0.05272839386424666, criterion='entropy' , max_depth = 17.0, class_weight = None)
#### Desicion Tree
clf3=DecisionTreeClassifier(min_weight_fraction_leaf= 0.03003518945237782, criterion= 'gini', max_depth =15.0, class_weight= None)
#### Nearest Neigbour 
clf4=KNeighborsClassifier(n_neighbors=37)