In [15]:
%pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [16]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from PIL import Image

np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

sobel_dx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
sobel_dy = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])

def extract_features(image_path):
    img = Image.open(image_path).convert("L")
    img = img.resize((50, 50))  
    imgdata = np.asarray(img)
    H, W = imgdata.shape

    def im2col(image, kernel_size=3):
        out_h = H - kernel_size + 1
        out_w = W - kernel_size + 1
        patches = []
        for i in range(out_h):
            for j in range(out_w):
                patch = image[i:i+kernel_size, j:j+kernel_size].flatten()
                patches.append(patch)
        return np.array(patches)

    patches = im2col(imgdata)
    flattened_kernel_dx = sobel_dx.flatten()
    flattened_kernel_dy = sobel_dy.flatten()

    output_dx = np.dot(patches, flattened_kernel_dx)
    output_dy = np.dot(patches, flattened_kernel_dy)
    dx_map = output_dx.reshape(H - 2, W - 2)
    dy_map = output_dy.reshape(H - 2, W - 2)

    gradient_magnitude = np.sqrt(dx_map**2 + dy_map**2)
    gradient_magnitude_normalized = (gradient_magnitude - np.min(gradient_magnitude)) / (np.max(gradient_magnitude) - np.min(gradient_magnitude)) * 255
    return gradient_magnitude_normalized.flatten()

def stack_features_labels(source_dir):
    data = []
    for file in os.listdir(source_dir):
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')):
            try:
                image_path = os.path.join(source_dir, file)
                label = file.split('_')[-1][0]  
                features = extract_features(image_path)
                data.append(np.append(features, label))
            except Exception as e:
                print(f"Error processing file {file}: {e}")
    return pd.DataFrame(data)

source_directory = r"C:\Users\srisi\OneDrive\Desktop\sidxt\hub\cnn-from-scratch\finaldata"  # Update this path
df = stack_features_labels(source_directory)

feature_columns = [f"feature_{i}" for i in range(df.shape[1] - 1)]
df.columns = feature_columns + ["label"]

df.to_csv("features_labels.csv", index=False)
print(df.head())
print("Dimensions of the DataFrame:", df.shape)

df = pd.read_csv("features_labels.csv")

X = df.drop(columns=["label"]).values 
y = df["label"].values

le = LabelEncoder()
y = le.fit_transform(y)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

input_dim = X_train.shape[1] 
output_dim = len(np.unique(y))  

model = nn.Sequential(
    nn.Linear(input_dim, 512),   
    nn.ReLU(),                   
    nn.Linear(512, 256),         
    nn.ReLU(),                  
    nn.Linear(256, output_dim)   
)
criterion = nn.CrossEntropyLoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")
model.eval()
with torch.no_grad():
    val_outputs = model(X_val_tensor)
    
    _, y_pred = torch.max(val_outputs, 1)
    
    y_true = y_val_tensor.numpy()
    y_pred = y_pred.numpy()
    
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    
    print(f"Validation Accuracy: {accuracy:.4f}")
    print(f"Validation Precision: {precision:.4f}")
    print(f"Validation Recall: {recall:.4f}")
    print(f"Validation F1 Score: {f1:.4f}")

torch.save(model.state_dict(), "model_save.pt")
print("Model saved as model_save.pt")

def predict_single_image(image_path, model_path, label_encoder):
    features = extract_features(image_path)
    features = features.reshape(1, -1)  
    features_tensor = torch.tensor(features, dtype=torch.float32)

    # Load the trained model
    model = nn.Sequential(
        nn.Linear(2304, 512),
        nn.ReLU(),
        nn.Linear(512, 256),
        nn.ReLU(),
        nn.Linear(256, len(label_encoder.classes_))
    )
    model.load_state_dict(torch.load(model_path))
    model.eval()
    with torch.no_grad():
        outputs = model(features_tensor)
        _, predicted = torch.max(outputs, 1)
    predicted_class = label_encoder.inverse_transform([predicted.item()])[0]
    return predicted_class

           feature_0           feature_1          feature_2  \
0  7.884595205398344  6.2518739175123335  7.307954750472525   
1                0.0                 0.0                0.0   
2                0.0                 0.0                0.0   
3                0.0                 0.0                0.0   
4  5.012887688488905  1.6495390798332754  7.880504314438845   

            feature_3          feature_4           feature_5  \
0  1.2979746736622324  9.687100738518199   7.895271710228687   
1                 0.0                0.0                 0.0   
2                 0.0                0.0                 0.0   
3                 0.0                0.0                 0.0   
4  0.8945881694449932  9.365445212217061  5.2009358927398734   

           feature_6           feature_7           feature_8  \
0  3.694100682587778   6.965663048116458                 0.0   
1                0.0                 0.0                 0.0   
2                0.0                 0.0    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [29]:

# Test prediction
image_path = r"C:\Users\srisi\OneDrive\Desktop\sidxt\hub\cnn-from-scratch\val\captcha_2_u.png"  # Update with your image path
model_path = "model_save.pt"  # Path to the saved model
predicted_class = predict_single_image(image_path, model_path, le)
print(f"Predicted class: {predicted_class}")


Predicted class: u


  model.load_state_dict(torch.load(model_path))
