In [None]:
import pickle
import math
import numpy as np
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import NuSVC
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import preprocessing
from xgboost import XGBClassifier

In [None]:
## Функция визуализации данных
def gen_image(arr):
    two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8)
    plt.imshow(two_d)
    return plt

In [None]:
class MyLogisticClassifier:
    def __init__(self, steps, alpha, add_intercept = True, decreasing_alpha = False):
        self.steps = steps
        self.alpha = alpha
        self.add_intercept = add_intercept
        self.decreasing_alpha = decreasing_alpha
        
    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    def fit(self, _X, _y):
        X, y = _X, _y
        if self.add_intercept: 
            intercept = np.ones((X.shape[0], 1))
            X = np.hstack((intercept, X))
        self.classes = np.unique(y)
        self.classes_weights = {}
        for cls in self.classes:
            self.classes_weights[cls] = np.random.uniform(-50, 50, X.shape[1])
        
        for cls in self.classes:
            y_cls = list(map(lambda x: 1 if x == cls else 0, y))
            for step in range(self.steps):
                scores = np.dot(X, self.classes_weights[cls])
                predictions = self.sigmoid(scores)

                error = y_cls - predictions
                gradient = np.dot(X.T, error) 

                if self.decreasing_alpha:
                    self.classes_weights[cls] += self.alpha/sqrt(step+1) * gradient
                else:
                    self.classes_weights[cls] += self.alpha * gradient
            
        
    def predict_classes(self, _X):
        X = _X
        if self.add_intercept:
            intercept = np.ones((X.shape[0], 1))
            X = np.hstack((intercept, X))
        
        labels = []
        for cls in self.classes:
            predictions = []
            for cls in self.classes:
                predictions.append(self.sigmoid(np.dot(X, self.classes_weights[cls])))
                
  
        predictions = np.array(predictions).T
        for pred in predictions:
            pred = list(pred)
            labels.append(pred.index(max(pred)))

                
        return labels

In [None]:
with open("mnist.dump","rb") as f:
    mnist = pickle.load(f, encoding = "latin1")

In [None]:
X_mnist = mnist.data
y_mnist = mnist.target
X_mnist_train, X_mnist_test, y_mnist_train, y_mnist_test = train_test_split(X_mnist, 
                                                                            y_mnist, 
                                                                            random_state = 57,
                                                                            test_size = 0.1)

preprocessing.normalize(X_mnist_train, copy = False)
preprocessing.normalize(X_mnist_test, copy = False)

In [None]:
for i in range(3):
    gen_image(X_mnist_train[i])
    plt.show()

In [None]:
%%time
mlr = MyLogisticClassifier(100, 0.01)
mlr.fit(X_mnist_train, y_mnist_train)

In [None]:
y_mnist_train_pred = mlr.predict_classes(X_mnist_train)
y_mnist_test_pred = mlr.predict_classes(X_mnist_test)

In [None]:
print("\nAccuracy on train")
print(accuracy_score(y_mnist_train, y_mnist_train_pred))
print("\nAccuracy on test")
print(accuracy_score(y_mnist_test, y_mnist_test_pred))

In [None]:
%%time
svc = LinearSVC(max_iter = 1000)
svc.fit(X_mnist_train, y_mnist_train)

In [None]:
y_mnist_train_pred = svc.predict(X_mnist_train)
y_mnist_test_pred = svc.predict(X_mnist_test)

In [None]:
print("\nAccuracy on train")
print(accuracy_score(y_mnist_train, y_mnist_train_pred))
print("Accuracy on test")
print(accuracy_score(y_mnist_test, y_mnist_test_pred))

In [None]:
%%time
xgb = XGBClassifier()
xgb.fit(X_mnist_train, y_mnist_train)

In [None]:
y_mnist_train_pred = xgb.predict(X_mnist_train)
y_mnist_test_pred = xgb.predict(X_mnist_test)

In [None]:
print("\nAccuracy on train")
print(accuracy_score(y_mnist_train, y_mnist_train_pred))
print("Accuracy on test")
print(accuracy_score(y_mnist_test, y_mnist_test_pred))

In [None]:
%%time
dtc = DecisionTreeClassifier()
dtc.fit(X_mnist_train, y_mnist_train)

In [None]:
y_mnist_train_pred = dtc.predict(X_mnist_train)
y_mnist_test_pred = dtc.predict(X_mnist_test)

In [None]:
print("\nAccuracy on train")
print(accuracy_score(y_mnist_train, y_mnist_train_pred))
print("Accuracy on test")
print(accuracy_score(y_mnist_test, y_mnist_test_pred))

In [None]:
%%time
rfc = RandomForestClassifier(n_estimators = 100)
rfc.fit(X_mnist_train, y_mnist_train)

In [None]:
y_mnist_train_pred = rfc.predict(X_mnist_train)
y_mnist_test_pred = rfc.predict(X_mnist_test)

In [None]:
print("\nAccuracy on train")
print(accuracy_score(y_mnist_train, y_mnist_train_pred))
print("Accuracy on test")
print(accuracy_score(y_mnist_test, y_mnist_test_pred))

In [None]:
%%time
abc = AdaBoostClassifier(DecisionTreeClassifier(), n_estimators = 10)
abc.fit(X_mnist_train, y_mnist_train)

In [None]:
y_mnist_train_pred = abc.predict(X_mnist_train)
y_mnist_test_pred = abc.predict(X_mnist_test)

In [None]:
print("\nAccuracy on train")
print(accuracy_score(y_mnist_train, y_mnist_train_pred))
print("Accuracy on test")
print(accuracy_score(y_mnist_test, y_mnist_test_pred))

In [None]:
%%time
abc = AdaBoostClassifier(RandomForestClassifier(n_estimators = 10), n_estimators = 100)
abc.fit(X_mnist_train, y_mnist_train)

In [None]:
y_mnist_train_pred = abc.predict(X_mnist_train)
y_mnist_test_pred = abc.predict(X_mnist_test)

In [None]:
print("\nAccuracy on train")
print(accuracy_score(y_mnist_train, y_mnist_train_pred))
print("Accuracy on test")
print(accuracy_score(y_mnist_test, y_mnist_test_pred))