In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

In [3]:
# fetching data
df = pd.read_csv("/content/drive/MyDrive/ML/Dataset/email.csv")
df["Category"] = df["Category"].map({"ham": 0, "spam": 1})
df.head()

Unnamed: 0,Category,Message
0,0.0,"Go until jurong point, crazy.. Available only ..."
1,0.0,Ok lar... Joking wif u oni...
2,1.0,Free entry in 2 a wkly comp to win FA Cup fina...
3,0.0,U dun say so early hor... U c already then say...
4,0.0,"Nah I don't think he goes to usf, he lives aro..."


In [4]:
# Convert text messages into numerical feature vectors
vectorizer = CountVectorizer(stop_words="english", max_features=300)
X = vectorizer.fit_transform(df["Message"]).toarray()
y = df["Category"].values

In [5]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Logistic regression
class LogisticRegressionMiniBatch:
    def __init__(self, lr=0.01, epochs=300, batch_size=64):
        self.lr = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.epochs):
            indices = np.random.permutation(n_samples)
            X_shuffled, y_shuffled = X[indices], y[indices]

            for i in range(0, n_samples, self.batch_size):
                X_batch = X_shuffled[i:i+self.batch_size]
                y_batch = y_shuffled[i:i+self.batch_size]

                linear_model = np.dot(X_batch, self.weights) + self.bias
                y_pred = self.sigmoid(linear_model)

                dw = (1 / self.batch_size) * np.dot(X_batch.T, (y_pred - y_batch))
                db = (1 / self.batch_size) * np.sum(y_pred - y_batch)

                self.weights -= self.lr * dw
                self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(linear_model)
        return np.array([1 if i > 0.5 else 0 for i in y_pred])

In [7]:
# Train model
model = LogisticRegressionMiniBatch(lr=0.01, epochs=300, batch_size=64)
model.fit(X_train, y_train)

In [8]:
# Predictions
y_pred = model.predict(X_test)

In [9]:
# Evaluate accuracy
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 85.92%
