In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv('../data/df_train.csv')
df_test = pd.read_csv('../data/df_test.csv')

X = df.drop(columns=["Class"])
y = df["Class"].to_frame()
X_test = df_test.drop(columns=["Class"])
y_test = df_test["Class"]

In [4]:
from funs import evaluate_accuracy
from sklearn.metrics import accuracy_score, classification_report

### Dummy Classifier

In [6]:
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, classification_report

dc = DummyClassifier(strategy='most_frequent', random_state=311)

evaluate_accuracy(dc, X, y, X_test, y_test)

Accuracy:  0.26058062259531306


### Decision Tree


In [13]:
from sklearn.tree import DecisionTreeClassifier, plot_tree

tree1 = DecisionTreeClassifier(random_state=311)

evaluate_accuracy(tree1, X, y, X_test, y_test)

tree2 = DecisionTreeClassifier(random_state=0, max_depth=9, max_leaf_nodes=28, min_samples_leaf=2, criterion='log_loss')

evaluate_accuracy(tree2, X, y, X_test, y_test)

Accuracy:  0.8849247988807275
Accuracy:  0.9010143406785589


### SVC

In [7]:
from sklearn.svm import SVC

svc = SVC(kernel='linear', random_state=311)

evaluate_accuracy(svc, X, y, X_test, y_test)

Accuracy:  0.9104582021685904


### Linear SVC

In [8]:
from sklearn.svm import LinearSVC

svc = LinearSVC(random_state=311, penalty='l2', loss='hinge', dual='auto', fit_intercept=False)

evaluate_accuracy(svc, X, y, X_test, y_test)

Accuracy:  0.478139209513816


### SGDClassifier

In [9]:
from sklearn.linear_model import SGDClassifier

sgd = SGDClassifier(random_state=311, loss='hinge', penalty=None, alpha=0.1, fit_intercept=False, max_iter=1000,
                      learning_rate='adaptive', eta0=1, shuffle=False, average=True)

evaluate_accuracy(sgd, X, y, X_test, y_test)

Accuracy:  0.6379853095487933


parametry poprawiły od 0.2 do 0.64 ale still średniawo

### Bayes

In [10]:
from sklearn.naive_bayes import GaussianNB

gaussian = GaussianNB()

evaluate_accuracy(gaussian, X, y, X_test, y_test)

Accuracy:  0.7562084644980762


### K Nearest Neighbours

In [11]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=12, weights='distance', algorithm='auto', p=0.2)  

evaluate_accuracy(knn, X, y, X_test, y_test)

Accuracy:  0.8835257082896117


### Ada Boost

In [14]:
from sklearn.ensemble import AdaBoostClassifier

ada = AdaBoostClassifier(estimator=tree2, random_state=311, learning_rate=0.2,
                           n_estimators=21, algorithm='SAMME')

evaluate_accuracy(ada, X, y, X_test, y_test)

Accuracy:  0.9233997901364114


### Random Forest Classifier

In [15]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=80, criterion='entropy', max_depth=20, random_state=311, max_features=None,
                             warm_start=True)

rfc.fit(X, y)
y_pred = rfc.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy:  0.925498426023085
              precision    recall  f1-score   support

    BARBUNYA       0.94      0.92      0.93       278
      BOMBAY       0.99      1.00      1.00       109
        CALI       0.94      0.94      0.94       342
    DERMASON       0.90      0.93      0.92       745
       HOROZ       0.96      0.94      0.95       405
       SEKER       0.96      0.94      0.95       426
        SIRA       0.88      0.87      0.88       554

    accuracy                           0.93      2859
   macro avg       0.94      0.94      0.94      2859
weighted avg       0.93      0.93      0.93      2859



### Gaussian Process

In [16]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(activation='identity', solver='adam', alpha=0.02, learning_rate='adaptive', learning_rate_init=0.02)

evaluate_accuracy(mlp, X, y, X_test, y_test)

Accuracy:  0.6432318992654774
