In [6]:
pip show opencv-python

Note: you may need to restart the kernel to use updated packages.




In [2]:
import streamlit as st
import numpy as np
import cv2
from PIL import Image
from streamlit_drawable_canvas import st_canvas


ModuleNotFoundError: No module named 'cv2'

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_openml
import pandas as pd


In [None]:
# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1, parser='auto')
X, y = mnist["data"], mnist["target"].astype(int)

In [None]:
# Convert target to integers
y = y.astype(int)

In [None]:
# Plotting random samples from the dataset
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.imshow(X.iloc[i].values.reshape(28, 28), cmap='gray')
    plt.title(f'Label: {y[i]}')
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Class distribution
plt.figure(figsize=(8, 5))
sns.countplot(x=y)
plt.title('Distribution of Handwritten Digits')
plt.xlabel('Digits')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.show()

In [None]:
# Check for missing values
missing_values = pd.DataFrame(X.isnull().sum(), columns=['Missing Values'])
print(missing_values[missing_values['Missing Values'] > 0])

In [None]:
# Normalize pixel values
X_normalized = X / 255.0
X_normalized

In [None]:

# Title
st.title("Handwritten")

# Drawing Canvas
canvas = st_canvas(
    fill_color="black",
    stroke_color="white",
    background_color="black",
    height=280,
    width=280,
    drawing_mode="freedraw",
)

if st.button("Predict"):
    # Process the drawn image
    if canvas.image_data is not None:
        img = canvas.image_data
        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGBA2GRAY)
        img = cv2.resize(img, (28, 28))
        img = img.reshape(1, -1) / 255.0  # Normalize

    else:
        st.write("No drawing found. Please draw something on the canvas.")


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import accuracy_score

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)


In [None]:
# Initialize classifiers
rf = RandomForestClassifier(n_estimators=100, random_state=42)
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)


In [None]:
# Voting Classifier
voting_clf = VotingClassifier(estimators=[('rf', rf), ('gb', gb)], voting='hard')



In [None]:
# Train classifiers
rf.fit(X_train, y_train)
gb.fit(X_train, y_train)
voting_clf.fit(X_train, y_train)


In [None]:
# Evaluate models
rf_pred = rf.predict(X_test)
gb_pred = gb.predict(X_test)
voting_pred = voting_clf.predict(X_test)

In [None]:
# Calculate accuracy
rf_accuracy = accuracy_score(y_test, rf_pred)
gb_accuracy = accuracy_score(y_test, gb_pred)
voting_accuracy = accuracy_score(y_test, voting_pred)


In [None]:
print(f"Random Forest Accuracy: {rf_accuracy:.2f}")
print(f"Gradient Boosting Accuracy: {gb_accuracy:.2f}")
print(f"Voting Classifier Accuracy: {voting_accuracy:.2f}")

In [None]:
import pandas as pd

# Create a summary DataFrame
results = pd.DataFrame({
    'Model': ['Random Forest', 'Gradient Boosting', 'Voting Classifier'],
    'Accuracy': [rf_accuracy, gb_accuracy, voting_accuracy]
})

# Display results
print(results)
