# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Importing Data

In [2]:
from sklearn.datasets import load_digits

In [3]:
df = load_digits()

In [26]:
df.target


array([0, 1, 2, ..., 8, 9, 8])

# Data preprocessing

In [9]:
df.images.shape

(1797, 8, 8)

In [11]:
df.images[0].shape

(8, 8)

In [13]:
n_samples = len(df.images)

In [15]:
data = df.images.reshape(n_samples, -1)

In [16]:
data.shape

(1797, 64)

# Scaling data

In [18]:
data.min()

0.0

In [19]:
data.max()

16.0

In [20]:
data = data/16

In [21]:
data.min()

0.0

In [22]:
data.max()

1.0

In [23]:
data

array([[0.    , 0.    , 0.3125, ..., 0.    , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 0.625 , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 1.    , 0.5625, 0.    ],
       ...,
       [0.    , 0.    , 0.0625, ..., 0.375 , 0.    , 0.    ],
       [0.    , 0.    , 0.125 , ..., 0.75  , 0.    , 0.    ],
       [0.    , 0.    , 0.625 , ..., 0.75  , 0.0625, 0.    ]])

# Train Test split

In [24]:
from sklearn.model_selection import train_test_split

In [27]:
X_train,X_test,y_train,y_test = train_test_split(data,df.target,train_size=0.7,test_size=0.3)

In [28]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((1257, 64), (540, 64), (1257,), (540,))

# Random Forest Model

In [29]:
from sklearn.ensemble import RandomForestClassifier

In [30]:
rf = RandomForestClassifier()

In [31]:
rf.fit(X_train,y_train) 

RandomForestClassifier()

# Predict Test Data 

In [32]:
y_pred = rf.predict(X_test)

In [33]:
y_pred

array([7, 3, 9, 9, 9, 8, 2, 9, 5, 7, 4, 6, 6, 4, 7, 7, 8, 4, 2, 1, 3, 7,
       6, 6, 1, 3, 3, 8, 5, 8, 5, 0, 2, 0, 6, 4, 2, 9, 5, 1, 4, 2, 3, 7,
       9, 1, 8, 0, 7, 1, 9, 6, 0, 0, 4, 6, 7, 6, 1, 8, 8, 9, 7, 0, 9, 5,
       5, 5, 2, 7, 9, 6, 4, 1, 2, 2, 9, 2, 4, 1, 7, 3, 8, 9, 7, 6, 2, 6,
       3, 4, 8, 3, 2, 3, 4, 2, 8, 0, 1, 3, 0, 8, 5, 2, 8, 0, 9, 6, 1, 0,
       6, 7, 3, 2, 8, 0, 7, 3, 5, 1, 5, 4, 8, 0, 4, 9, 8, 0, 3, 8, 6, 3,
       0, 7, 1, 5, 3, 4, 4, 9, 4, 8, 0, 8, 3, 4, 0, 4, 6, 1, 5, 6, 9, 0,
       8, 8, 6, 4, 7, 7, 8, 2, 3, 1, 3, 6, 1, 1, 9, 3, 6, 3, 3, 8, 7, 8,
       3, 1, 9, 8, 3, 1, 4, 3, 1, 1, 0, 4, 0, 1, 6, 6, 2, 0, 6, 6, 4, 3,
       3, 8, 5, 1, 5, 5, 0, 8, 6, 5, 7, 9, 7, 4, 5, 5, 7, 8, 8, 1, 9, 4,
       9, 8, 2, 3, 3, 3, 7, 3, 4, 0, 1, 7, 7, 6, 2, 0, 4, 3, 8, 0, 7, 8,
       8, 4, 4, 7, 8, 4, 2, 6, 0, 4, 7, 5, 3, 4, 1, 5, 5, 3, 2, 2, 7, 5,
       2, 6, 4, 8, 7, 1, 8, 5, 1, 5, 6, 0, 5, 4, 0, 4, 1, 4, 0, 2, 1, 1,
       9, 7, 0, 9, 5, 9, 4, 7, 6, 0, 1, 0, 4, 1, 2,

# Model Accuracy

In [39]:
from sklearn.metrics import mean_absolute_error , mean_absolute_percentage_error , r2_score , confusion_matrix , classification_report

In [36]:
mean_absolute_error(y_test,y_pred)

0.046296296296296294

In [37]:
mean_absolute_percentage_error(y_test,y_pred)

33359997239781.453

In [38]:
r2_score(y_test,y_pred)

0.9796432388543211

In [40]:
confusion_matrix(y_test,y_pred)

array([[56,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 0, 52,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  1, 48,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 65,  0,  0,  0,  0,  1,  0],
       [ 0,  0,  0,  0, 52,  0,  0,  0,  0,  1],
       [ 0,  0,  0,  0,  0, 49,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  1, 53,  0,  1,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 52,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  1, 51,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  2, 52]], dtype=int64)