# Stochastic Gradient Descent
Load the `mnist` dataset. Split it into training and test sets. Train and test a stochastic gradient descent model using scikit-learn. Check the documentation to identify the most important hyperparameters, attributes, and methods of the model. Use them in practice.

In [2]:
import pandas as pd #Dataframe loading or making new dataframes
from sklearn.metrics import accuracy_score #To get accuracy
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split # To split data
from sklearn.model_selection import GridSearchCV # Hyperparameter tunning with Gridsearch form sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import RandomizedSearchCV



In [3]:
df = pd.read_csv('mnist.csv')
df = df.set_index('id')

x = df.drop(['class'], axis=1)
y = df['class'] # Selecting the labeled data
 
print(df.shape)
df.head(3)

(4000, 785)


Unnamed: 0_level_0,class,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
31953,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
34452,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
60897,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Spliting Data

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y)

print('df', df.shape)
print('x_train', x_train.shape)
print('x_test', x_test.shape)
print('y_train', y_train.shape)
print('y_test', y_test.shape)

df (4000, 785)
x_train (3000, 784)
x_test (1000, 784)
y_train (3000,)
y_test (1000,)


# Scaling The Features

In [5]:
scaler = StandardScaler(with_mean=False)
scaler.fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

# Model Selection and Hyper parameter tunning with Stochastic Gradient Descent

In [6]:
parameters_grid = {
    "loss": ["squared_loss", "hinge"],
    "penalty": ["l1", "l2"],
    "alpha": [0.1, 0.001, 0.0001, 0.00001, 0.00005],
    "max_iter": [100, 1000, 3000],
    "tol": [None, 0.0001, 0.001, 0.01],
    "shuffle": [True, False],
    "learning_rate": ["constant", "optimal"],
    "eta0": [0.001, 0.01, 0.1],
    "early_stopping": [True, False],
    "n_iter_no_change": [1, 5, 10]
}

model_1 = RandomizedSearchCV(SGDClassifier(),
                            parameters_grid, n_iter=100, scoring="accuracy", cv=5, n_jobs=-1)

model_1.fit(x_train, y_train)
print("Accuracy of best SGD Classifier = {:.2f}".format(model_1.best_score_))
print("Best found hyperparameters of SGD Classifier = {:.2f}".format(model_1.best_params_))






Accuracy of best SGD Classifier = 0.89


TypeError: unsupported format string passed to dict.__format__



# Testing The trained Model

In [7]:
y_predicted = model_1.predict(x_test)
accuracy = accuracy_score(y_test, y_predicted)

cm = confusion_matrix(y_test, y_predicted)
precision, recall, f1, support = precision_recall_fscore_support(y_test, y_predicted)

print("Accuracy =", accuracy)
print("Precision = ", precision)
print("Recall = ", recall)
print('F1-Score', f1)
print("Confusion Matrix:\n", cm)

Accuracy = 0.89
Precision =  [0.96       0.912      0.86170213 0.91304348 0.92592593 0.80952381
 0.9        0.8852459  0.8372093  0.87096774]
Recall =  [0.98969072 0.94214876 0.93103448 0.84       0.89285714 0.75555556
 0.9        0.98181818 0.8        0.84375   ]
F1-Score [0.97461929 0.92682927 0.89502762 0.875      0.90909091 0.7816092
 0.9        0.93103448 0.81818182 0.85714286]
Confusion Matrix:
 [[ 96   0   0   0   0   0   1   0   0   0]
 [  0 114   1   2   0   2   0   0   2   0]
 [  0   0  81   0   1   0   2   1   1   1]
 [  1   2   3 105   0   4   1   5   3   1]
 [  0   0   1   0  75   0   2   0   0   6]
 [  2   3   1   5   1  68   2   1   5   2]
 [  0   0   5   0   0   3  90   0   2   0]
 [  0   1   0   0   0   0   0 108   0   1]
 [  1   3   1   1   2   6   2   1  72   1]
 [  0   2   1   2   2   1   0   6   1  81]]
