In [None]:
#Classification

#Classification is a very important area of supervised machine learning. 
#A large number of important machine learning problems fall within this area. 
#There are many classification methods, and logistic regression is one of them.

#Other classification algorithms include:

k-Nearest Neighbors
Naive Bayes classifiers
Support Vector Machines
Decision Trees
Random Forests
Neural Networks

# Example 1: Simple Logistic Regression with Scikit Learn

In [None]:
Logistic Regression in Python With scikit-learn: Example 1

#Our first example is related to a single-variate binary classification problem. 
#This is the most straightforward kind of classification problem. 
#There are several general steps you’ll take when you’re preparing your classification models:

#Step 1: Import packages, functions, and classes

#Step 2: Get data to work with and, if appropriate, transform it

#Step 3: Create a classification model and train (or fit) it with your existing data

#Step 4: Evaluate your model to see if its performance is satisfactory

#A sufficiently good model that you define can be used to make further predictions related to new, unseen data. 
#The above procedure is the same for classification and regression

In [None]:
#Step 1: Import Packages, Functions, and Classes

#First, you have to import Matplotlib for visualization and NumPy for array operations. 
#You’ll also need LogisticRegression, classification_report(), and confusion_matrix() from scikit-learn:


import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
#Step 2: Get Data
#In practice, you’ll usually have some data to work with. 
#For the purpose of this example, let’s just create arrays for the input (𝑥) and output (𝑦) values:

x = np.arange(10).reshape(-1, 1)
y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

In [None]:
#The input and output should be NumPy arrays (instances of the class numpy.ndarray) or similar objects. 
#numpy.arange() creates an array of consecutive, equally-spaced values within a given range. 
#For more information on this function, check the official documentation or NumPy arange(): How to Use np.arange().

#The array x is required to be two-dimensional. 
#It should have one column for each input, and the number of rows should be equal to the number of observations. 
#To make x two-dimensional, you apply .reshape() with the arguments -1 to get as many rows as needed and 1 to get one column. 
#For more information on .reshape(), you can check out the official documentation. Here’s how x and y look now:

In [None]:
x

In [None]:
y

In [None]:
#Note that x has two dimensions:

#One column for a single input
#Ten rows, each corresponding to one observation
#y is one-dimensional with ten items. Again, each item corresponds to one observation. 
#It contains only zeros and ones since this is a binary classification problem.

In [None]:
#Step 3 - 1: Create a classification model and train (or fit) it with your existing data

#Once you have the input and output prepared, you can create and define your classification model. 
#You’re going to represent it with an instance of the class LogisticRegression:

model = LogisticRegression(solver='liblinear', random_state=0)

In [None]:
#LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=0, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
#The above statement creates an instance of LogisticRegression and binds its references to the variable model. 
#LogisticRegression has several optional parameters that define the behavior of the model and approach:

#penalty is a string ('l2' by default) that decides whether there is regularization and which approach to use. Other options are 'l1', 'elasticnet', and 'none'.

#dual is a Boolean (False by default) that decides whether to use primal (when False) or dual formulation (when True).

#tol is a floating-point number (0.0001 by default) that defines the tolerance for stopping the procedure.

#C is a positive floating-point number (1.0 by default) that defines the relative strength of regularization. Smaller values indicate stronger regularization.

#fit_intercept is a Boolean (True by default) that decides whether to calculate the intercept 𝑏₀ (when True) or consider it equal to zero (when False).

#intercept_scaling is a floating-point number (1.0 by default) that defines the scaling of the intercept 𝑏₀.

#class_weight is a dictionary, 'balanced', or None (default) that defines the weights related to each class. When None, all classes have the weight one.

#random_state is an integer, an instance of numpy.RandomState, or None (default) that defines what pseudo-random number generator to use.

#solver is a string ('liblinear' by default) that decides what solver to use for fitting the model. Other options are 'newton-cg', 'lbfgs', 'sag', and 'saga'.

#max_iter is an integer (100 by default) that defines the maximum number of iterations by the solver during model fitting.

#multi_class is a string ('ovr' by default) that decides the approach to use for handling multiple classes. Other options are 'multinomial' and 'auto'.

#verbose is a non-negative integer (0 by default) that defines the verbosity for the 'liblinear' and 'lbfgs' solvers.

#warm_start is a Boolean (False by default) that decides whether to reuse the previously obtained solution.

#n_jobs is an integer or None (default) that defines the number of parallel processes to use. None usually means to use one core, while -1 means to use all available cores.

#l1_ratio is either a floating-point number between zero and one or None (default). It defines the relative importance of the L1 part in the elastic-net regularization.

In [None]:
#You should carefully match the solver and regularization method for several reasons:

#'liblinear' solver doesn’t work without regularization.
#'newton-cg', 'sag', 'saga', and 'lbfgs' don’t support L1 regularization.
#'saga' is the only solver that supports elastic-net regularization.
#Once the model is created, you need to fit (or train) it. 

#Model fitting is the process of determining the coefficients 𝑏₀, 𝑏₁, …, 𝑏ᵣ that correspond to the best value of the cost function. 
#You fit the model with .fit():

In [None]:
#Step 3 - 2: Create a classification model and train (or fit) it with your existing data

model.fit(x, y)

In [None]:
#.fit() takes x, y, and possibly observation-related weights. Then it fits the model and returns the model instance itself:

In [None]:
#LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=0, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
#This is the obtained string representation of the fitted model.

#You can use the fact that .fit() returns the model instance and chain the last two statements. 
#They are equivalent to the following line of code:

In [None]:
model = LogisticRegression(solver='liblinear', random_state=0).fit(x, y)

In [None]:
#At this point, you have the classification model defined.

#You can quickly get the attributes of your model. 
#For example, the attribute .classes_ represents the array of distinct values that y takes:

In [None]:
model.classes_

In [None]:
#This is the example of binary classification, and y can be 0 or 1, as indicated above.

#You can also get the value of the slope 𝑏₁ and the intercept 𝑏₀ of the linear function 𝑓 like so:

In [None]:
#You can also get the value of the intercept 𝑏₀ of the linear function 𝑓 like so:

model.intercept_

In [None]:
#You can also get the value of the slope 𝑏₁ the linear function 𝑓 like so:

model.coef_

In [None]:
#As you can see, 𝑏₀ is given inside a one-dimensional array, while 𝑏₁ is inside a two-dimensional array. 
#You use the attributes .intercept_ and .coef_ to get these results.

In [None]:
#Step 4: Evaluate the Model
#Once a model is defined, you can check its performance with .predict_proba(), 
#which returns the matrix of probabilities that the predicted output is equal to zero or one:


model.predict_proba(x)

In [None]:
#In the matrix above, each row corresponds to a single observation. 
#The first column is the probability of the predicted output being zero, that is 1 - 𝑝(𝑥). 
#The second column is the probability that the output is one, or 𝑝(𝑥).

In [None]:
#Step 4-1: Obtain actual predictions
#You can get the actual predictions, based on the probability matrix and the values of 𝑝(𝑥), with .predict():

model.predict(x)

In [None]:
#Step 4-2

#.score() takes the input and output as arguments and returns 
#the ratio of the number of correct predictions to the number of observations

model.score(x, y)

In [None]:
#You can get more information on the accuracy of the model with a confusion matrix. 
#In the case of binary classification, the confusion matrix shows the numbers of the following:

#True negatives in the upper-left position
#False negatives in the lower-left position
#False positives in the upper-right position
#True positives in the lower-right position

In [None]:
#Step 4-3

#To create the confusion matrix, you can use confusion_matrix() 
#and provide the actual and predicted outputs as the arguments:

confusion_matrix(y, model.predict(x))

In [None]:
#The obtained matrix shows the following:

#Three true negative predictions: The first three observations are zeros predicted correctly.
#No false negative predictions: These are the ones wrongly predicted as zeros.
#One false positive prediction: The fourth observation is a zero that was wrongly predicted as one.
#Six true positive predictions: The last six observations are ones predicted correctly.

In [None]:
#Step 4-4
#It’s often useful to visualize the confusion matrix. 
#You can do that with .imshow() from Matplotlib, which accepts the confusion matrix as the argument:

cm = confusion_matrix(y, model.predict(x))

fig, ax = plt.subplots(figsize=(8, 8))
ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted 0s', 'Predicted 1s'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual 0s', 'Actual 1s'))
ax.set_ylim(1.5, -0.5)
for i in range(2):
    for j in range(2):
        ax.text(j, i, cm[i, j], ha='center', va='center', color='red')
plt.show()

In [None]:
#The code above creates a heatmap that represents the confusion matrix above.
#In this figure, different colors represent different numbers and similar colors represent similar numbers. 
#Heatmaps are a nice and convenient way to represent a matrix. 

In [None]:
#You can get a more comprehensive report on the classification with classification_report():
# We use the code below to print the classification report

print(classification_report(y, model.predict(x)))

In [None]:
#This function also takes the actual and predicted outputs as arguments. 
#It returns a report on the classification as a dictionary if you provide output_dict=True or a string otherwise.

In [None]:
#Step Improve the Model
#You can improve your model by setting different parameters. 
#For example, let’s work with the regularization strength C equal to 10.0, instead of the default value of 1.0:

model = LogisticRegression(solver='liblinear', C=10.0, random_state=0)
model.fit(x, y)

In [None]:
#Now you have another model with different parameters. 
#It’s also going to have a different probability matrix and a different set of coefficients and predictions:




In [None]:
model.intercept_

In [None]:
model.coef_

In [None]:
model.predict_proba(x)

In [None]:
#As you can see, the absolute values of the intercept 𝑏₀ and the coefficient 𝑏₁ are larger. 
#This is the case because the larger value of C means weaker regularization, 
#or weaker penalization related to high values of 𝑏₀ and 𝑏₁.

In [None]:
#Different values of 𝑏₀ and 𝑏₁ imply a change of the logit 𝑓(𝑥), different values of the probabilities 𝑝(𝑥), 
#a different shape of the regression line, and possibly changes in other predicted outputs 
#and classification performance. The boundary value of 𝑥 for which 𝑝(𝑥)=0.5 and 𝑓(𝑥)=0 is higher now. It’s above 3. 
#In this case, you obtain all true predictions, as shown by the accuracy, confusion matrix, and classification report

In [None]:
#You can get the actual predictions, based on the probability matrix and the values of 𝑝(𝑥), with .predict():

model.predict(x)

In [None]:
#When you have nine out of ten observations classified correctly, 
#the accuracy of your model can be obtained with .score():

model.score(x, y)

In [None]:
confusion_matrix(y, model.predict(x))

In [None]:
print(classification_report(y, model.predict(x)))

In [None]:
#The score (or accuracy) of 1 and the zeros in the lower-left and upper-right fields of the 
#confusion matrix indicate that the actual and predicted outputs are the same

# Example 2: Logistic Regression in Python with scikit-learn

In [None]:

#Let’s solve another classification problem. 
#It’s similar to the previous one, except that the output differs in the second value. 
#The code is similar to the previous case:

In [None]:
# Step 1: Import packages, functions, and classes
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Step 2: Get data
x = np.arange(10).reshape(-1, 1)
y = np.array([0, 1, 0, 0, 1, 1, 1, 1, 1, 1])

In [None]:
# Step 3: Create a model and train it
model = LogisticRegression(solver='liblinear', C=10.0, random_state=0)
model.fit(x, y)

In [None]:
# Step 4-1: Evaluate the model
p_pred = model.predict_proba(x) #obtain probabilities


In [None]:
#Step 4-2: Obtain predictions/classification results for values in x
y_pred = model.predict(x) 


In [None]:
#Step 4-3: .score() takes the input (x) and output (y) as arguments 
#and returns the ratio of the number of correct predictions to the number of observations

score_ = model.score(x, y)


In [None]:
#Step 4-4: Obtain confusion matrix

conf_m = confusion_matrix(y, y_pred)


In [None]:
#Step 4-5: Obtain classification report

report = classification_report(y, y_pred)

In [None]:
#Display x

print('x:', x, sep='\n')

In [None]:
#Display y

print('y:', y, sep='\n', end='\n\n')

In [None]:
#Display intercept

print('intercept:', model.intercept_)

In [None]:
#Display coefficients

print('coef:', model.coef_, end='\n\n')

In [None]:
#Display predited probabilities

print('p_pred:', p_pred, sep='\n', end='\n\n')

In [None]:
#Display predicted values of y

print('y_pred:', y_pred, end='\n\n')

In [None]:
#Display accuracy score

print('score_:', score_, end='\n\n')

In [None]:
#Display confusion matrix

print('conf_m:', conf_m, sep='\n', end='\n\n')

In [None]:
#Display classification report

print('report:', report, sep='\n')

In [None]:
#In this case, the score (or accuracy) is 0.8. 
#There are two observations classified incorrectly. 
#One of them is a false negative, while the other is a false positive.

# Example 3: Logistic Regression in Python With StatsModels

In [None]:

#You can also implement logistic regression in Python with the StatsModels package. 
#Typically, you want this when you need more statistical details related to models and results. 
#The procedure is similar to that of scikit-learn.

In [None]:
#Step 1: Import Packages
#All you need to import is NumPy and statsmodels.api:


import numpy as np
import statsmodels.api as sm

In [None]:
#Step 2: Get Data
#You can get the inputs and output the same way as you did with scikit-learn. 
#However, StatsModels doesn’t take the intercept 𝑏₀ into account, 
#and you need to include the additional column of ones in x. You do that with add_constant():


x = np.arange(10).reshape(-1, 1)
y = np.array([0, 1, 0, 0, 1, 1, 1, 1, 1, 1])
x = sm.add_constant(x)

In [None]:
#add_constant() takes the array x as the argument and returns a new array with the additional column of ones. 
#This is how x and y look:

In [None]:
x

In [None]:
y

In [None]:
#This is your data. The first column of x corresponds to the intercept 𝑏₀. 
#The second column contains the original values of x.

In [None]:
#Step 3: Create a Model and Train It
#Your logistic regression model is going to be an instance of the class statsmodels.discrete.discrete_model.Logit. 
#This is how you can create one:

model = sm.Logit(y, x)

In [None]:
#Note that the first argument here is y, followed by x.

In [None]:
#Now, you’ve created your model and you should fit it with the existing data. 
#You do that with .fit() or, if you want to apply L1 regularization, with .fit_regularized():

result = model.fit(method='newton')

In [None]:
#The model is now ready, and the variable result holds useful data. 
#For example, you can obtain the values of 𝑏₀ and 𝑏₁ with .params:

result.params

In [None]:
#The first element of the obtained array is the intercept 𝑏₀, while the second is the slope 𝑏₁.

In [None]:
#Step 4: Evaluate the Model
#You can use results to obtain the probabilities of the predicted outputs being equal to one:


result.predict(x)

In [None]:
#Predict actual outputs. Remember that from the sigmoid function, when the probability for 
# an observation is >= 0.5, the output is predicted as "success" or 1

#These probabilities are calculated with .predict(). 
#You can use their values to get the actual predicted outputs:

(result.predict(x) >= 0.5).astype(int)

In [None]:
#As an alternative to the cell above,You can also predict the outputs and assign to an object before displaying the object:

predicted_outputs= (result.predict(x) >= 0.5).astype(int)

In [None]:
#Display predicted outputs

predicted_outputs

In [None]:
#Notice that the predictions use 0.5 as a threshold for deciding whether the predicted value is 1 or not
#for probabilities greater than or equal to 0.5,the prediction is a 1
#for probabilities less than 0.5, the prediction is a 0

In [None]:
#The obtained array contains the predicted output values. 
#As you can see, 𝑏₀, 𝑏₁, and the probabilities obtained with scikit-learn and StatsModels are different. 
#This is the consequence of applying different iterative and approximate procedures and parameters. 
#However, in this case, you obtain the same predicted outputs as when you used scikit-learn.

#You can obtain the confusion matrix with .pred_table():

result.pred_table()

In [None]:
#This example is the same as when you used scikit-learn because the predicted ouptuts are equal. 
#The confusion matrices you obtained with StatsModels and scikit-learn differ 
#in the types of their elements (floating-point numbers and integers)


#.summary() and .summary2() get output data that you might find useful in some circumstances:

In [None]:
#Option 1: Using .summary() to produce summary regression results

result.summary()

In [None]:
#Option 2: Using .summary2() to produce summary regression results

result.summary2()

In [None]:
#These are detailed reports with values that you can obtain with appropriate methods and attributes.

# Example 3: Logistic Regression in Python: Handwriting Recognition

In [None]:

#The previous examples illustrated the implementation of logistic regression in Python, 
#as well as some details related to this method. 
#The next example will show you how to use logistic regression to solve a real-world classification problem. 
#The approach is very similar to what you’ve already seen, but with a larger dataset and several additional concerns.

#This example is about image recognition. 
#To be more precise, you’ll work on the recognition of handwritten digits. 
#You’ll use a dataset with 1797 observations, each of which is an image of one handwritten digit. 
#Each image has 64 px, with a width of 8 px and a height of 8 px.

In [None]:
#Inputs
#The inputs (𝐱) are vectors with 64 dimensions or values. 
#Each input vector describes one image. Each of the 64 values represents one pixel of the image. 
#The input values are the integers between 0 and 16, depending on the shade of gray for the corresponding pixel. 

#Output/target
#The output (𝑦) for each observation is an integer between 0 and 9, consistent with the digit on the image. 
#There are ten classes in total, each corresponding to one image.

In [None]:
#Step 1: Import Packages
#You’ll need to import Matplotlib, NumPy, and several functions and classes from scikit-learn:

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
#Step 2-1: Get Data
#You can grab the dataset directly from scikit-learn with load_digits(). 
#It returns a tuple of the inputs and output:

x, y = load_digits(return_X_y=True)

In [None]:
#Now you have the data. This is how x and y look:

In [None]:
x

In [None]:
y

In [None]:
#That’s your data to work with. x is a multi-dimensional array with 1797 rows and 64 columns. 
#It contains integers from 0 to 16. y is an one-dimensional array with 1797 integers between 0 and 9

In [None]:
#Step 2-2: Split Data

#It’s a good and widely-adopted practice to split the dataset you’re working with into two subsets. 
#These are the training set and the test set. This split is usually performed randomly. 

#You should use the training set to fit your model. 
#Once the model is fitted, you evaluate its performance with the test set. 

#It’s important not to use the test set in the process of fitting the model. 

#This approach enables an unbiased evaluation of the model.

#One way to split your dataset into training and test sets is to apply train_test_split():

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [None]:
#train_test_split() accepts x and y. 
#It also takes test_size, which determines the size of the test set, 
#and random_state to define the state of the pseudo-random number generator, 
#as well as other optional arguments. This function returns a list with four arrays:

x_train: the part of x used to fit the model
x_test: the part of x used to evaluate the model
y_train: the part of y that corresponds to x_train
y_test: the part of y that corresponds to x_test

#Once your data is split, you can forget about x_test and y_test until you define your model.



In [None]:
#Step 2-3: Scale Data

#Standardization is the process of transforming data in a way such that the 
#mean of each column becomes equal to zero, and the standard deviation of each column is one. 
#This way, you obtain the same scale for all columns. Take the following steps to standardize your data:

#a. Calculate the mean and standard deviation for each column.
#b. Subtract the corresponding mean from each element.
#c. Divide the obtained difference by the corresponding standard deviation.

#It’s a good practice to standardize the input data that you use for logistic regression, 
#although in many cases it’s not necessary. Standardization might improve the performance of your algorithm. 
#It helps if you need to compare and interpret the weights. 
#It’s important when you apply penalization because the algorithm is 
# actually penalizing against the large values of the weights.

#You can standardize your inputs by creating an instance of StandardScaler and calling .fit_transform() on it:

In [None]:
#You can standardize your inputs by creating an instance of StandardScaler and calling .fit_transform() on it:

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)

In [None]:
#.fit_transform() fits the instance of StandardScaler to the array passed as the argument,
# transforms this array, and returns the new, standardized array. 
#Now, x_train is a standardized input array.

In [None]:
#Step 3: Create a Model and Train It
#This step is very similar to the previous examples. 
#The only difference is that you use x_train and y_train subsets to fit the model. 
#Again, you should create an instance of LogisticRegression and call .fit() on it:



In [None]:
model = LogisticRegression(solver='liblinear', C=0.05, multi_class='ovr', random_state=0)
model.fit(x_train, y_train)

In [None]:
#When you’re working with problems with more than two classes, 
#you should specify the multi_class parameter of LogisticRegression. 
#It determines how to solve the problem:

#'ovr' says to make the binary fit for each class.
#'multinomial' says to apply the multinomial loss fit.

In [None]:
#The last statement yields the following output since .fit() returns the model itself:


#LogisticRegression(C=0.05, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='ovr', n_jobs=None, penalty='l2', random_state=0,
                   solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

In [None]:
#These are the parameters of your model. It’s now defined and ready for the next step.

In [None]:
#Step 4: Evaluate the Model

#You should evaluate your model similar to what you did in the previous examples, 
#with the difference that you’ll mostly use x_test and y_test, 
#which are the subsets of your dataset not applied for training. 

#If you’ve decided to standardize x_train, then the obtained model relies on the scaled data, 
#so x_test should be scaled as well with the same instance of StandardScaler:

x_test = scaler.transform(x_test)

In [None]:
#That’s how you obtain a new, properly-scaled x_test. 
#In this case, you use .transform(), which only transforms the argument, without fitting the scaler.

In [None]:
#You can obtain the predicted outputs with .predict():

y_pred = model.predict(x_test)

In [None]:
#The variable y_pred is now bound to an array of the predicted outputs. 
#Note that you use x_test as the argument here.

In [None]:
#You can obtain the accuracy with .score():

model.score(x_train, y_train)

In [None]:
#You can obtain the accuracy with .score():

model.score(x_test, y_test)

In [None]:
#Actually, you can get two values of the accuracy, one obtained with the training set and other with the test set. 
#It might be a good idea to compare the two, 
# as a situation where the training set accuracy is much higher might indicate overfitting. 
#The test set accuracy is more relevant for evaluating the performance on unseen data since it’s not biased.

In [None]:
#You can get the confusion matrix with confusion_matrix():

confusion_matrix(y_test, y_pred)

In [None]:
#The obtained confusion matrix is large. In this case, it has 100 numbers. 
#This is a situation when it might be really useful to visualize it:

cm = confusion_matrix(y_test, y_pred)

font_size = 4

fig, ax = plt.subplots(figsize=(8, 8))
ax.imshow(cm)
ax.grid(False)
ax.set_xlabel('Predicted outputs', fontsize=font_size, color='black')
ax.set_ylabel('Actual outputs', fontsize=font_size, color='black')
ax.xaxis.set(ticks=range(10))
ax.yaxis.set(ticks=range(10))
ax.set_ylim(9.5, -0.5)
for i in range(10):
    for j in range(10):
        ax.text(j, i, cm[i, j], ha='center', va='center', color='white')
plt.show()

In [None]:
#The code above produces the figure above of the confusion matrix:

In [None]:
#The heatmap above illustrates the confusion matrix with numbers and colors. 
#You can see that the shades of purple represent small numbers (like 0, 1, or 2), 
#while green and yellow show much larger numbers (27 and above).

#The numbers on the main diagonal (27, 32, …, 36) show the number of correct predictions from the test set. 
#For example, there are 27 images with zero, 32 images of one, 
#and so on that are correctly classified. Other numbers correspond to the incorrect predictions. 
#For example, the number 1 in the third row and the first column shows that 
#there is one image with the number 2 incorrectly classified as 0

In [None]:
#Step 5: Finally, you can get the report on classification as a string or dictionary with classification_report():

print(classification_report(y_test, y_pred))

In [None]:
#The report above shows additional information, like the support and precision of classifying each digit.

In [None]:
#Beyond Logistic Regression in Python
#Logistic regression is a fundamental classification technique. 
#It’s a relatively uncomplicated linear classifier. 
#Despite its simplicity and popularity, there are cases (especially with highly complex models) 
#where logistic regression doesn’t work well. In such circumstances, you can use other classification techniques:

k-Nearest Neighbors
Naive Bayes classifiers
Support Vector Machines
Decision Trees
Random Forests
Neural Networks