Title: Popular Classification Algorithms

Support Vector Machines (SVM)

Task 1: Identify handwriting on checks and classify each letter.

In [None]:

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Define the ConvNet model
class HandwrittenLetterClassifier(nn.Module):
    def __init__(self):
        super(HandwrittenLetterClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(64 * 6 * 6, 128)
        self.fc2 = nn.Linear(128, 26)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 6 * 6)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Load the dataset (assuming we have a dataset of handwritten letters)
# For demonstration purposes, let's assume we have a dataset of handwritten letters with 26 classes (A-Z)
transform = transforms.Compose([transforms.ToTensor()])
# dataset = torchvision.datasets.ImageFolder(root='path_to_dataset', transform=transform)

# Define the data loaders
# train_loader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=True)

# Initialize the model, loss function, and optimizer
model = HandwrittenLetterClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
# for epoch in range(5):
#     for images, labels in train_loader:
#         optimizer.zero_grad()
#         outputs = model(images)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()

# Use the model to classify handwritten letters on checks
# check_image = ... (load and preprocess the check image)
# output = model(check_image)
# _, predicted = torch.max(output, 1)
# print("Predicted letter:", chr(predicted.item() + 65))  # Assuming A=0, B=1, ..., Z=25





Task 2: Detect gender of a speaker based on voice data.

In [None]:
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import os

# Define the path to the dataset
dataset_path = '/path/to/dataset'

# Sample dataset (replace with your actual dataset)
male_files = ['male_file1.wav', 'male_file2.wav', 'male_file3.wav']
female_files = ['female_file1.wav', 'female_file2.wav', 'female_file3.wav']

# Function to load voice data
def load_voice_data(file_path):
    signal, sr = librosa.load(file_path)
    mfccs = librosa.feature.mfcc(signal, sr=sr)
    mfccs_mean = np.mean(mfccs, axis=1)
    return mfccs_mean

# Load the dataset
X = []
y = []
for file in male_files:
    file_path = os.path.join(dataset_path, file)
    if os.path.exists(file_path):
        X.append(load_voice_data(file_path))
        y.append(0)  # 0 for male
    else:
        print(f"File {file} not found.")
for file in female_files:
    file_path = os.path.join(dataset_path, file)
    if os.path.exists(file_path):
        X.append(load_voice_data(file_path))
        y.append(1)  # 1 for female
    else:
        print(f"File {file} not found.")

X = np.array(X)
y = np.array(y)

# Check if X and y are not empty
if len(X) > 0 and len(y) > 0:
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a Random Forest classifier
    rf = RandomForestClassifier(n_estimators=100, random_state=42)

    # Train the classifier
    rf.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = rf.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
else:
    print("No files found in the dataset.")



Task 3: Classify email topics based on content.

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Sample dataset
data = pd.DataFrame({
    'content': ['This is a meeting invitation.', 'Your payment is due.', 'We have a new product launch.', 'Your account has been compromised.'],
    'topic': ['Meeting', 'Payment', 'Product', 'Security']
})

# Function to preprocess the content
def preprocess_content(content):
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(content.lower())
    tokens = [token for token in tokens if token.isalpha() and token not in stop_words]
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

# Preprocess the content
data['content'] = data['content'].apply(preprocess_content)

# Split the dataset into features (X) and target (y)
X = data['content']
y = data['topic']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer to the training data and transform both the training and testing data
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Create a Multinomial Naive Bayes classifier
clf = MultinomialNB()

# Train the classifier
clf.fit(X_train_vectorized, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test_vectorized)


accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
