In [None]:
import os
import numpy as np

def load_data(directory):
    texts = []
    labels = []

    # Loop through each file in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            label = filename.split('_')[0]  # Assuming labels are in the filename, like 'label_filename.txt'
            with open(os.path.join(directory, filename), 'r') as file:
                texts.append(file.read())
                labels.append(label)

    return texts, np.array(labels)

In [None]:
# Example usage
data_dir = "/content/drive/MyDrive/Tech400_animals"
texts, labels = load_data(data_dir)

In [None]:
import numpy as np
import re

def tokenize(text):
    # Remove punctuation and tokenize
    return re.findall(r'\b\w+\b', text.lower())

def build_vocab(texts):
    vocabulary = {}
    index = 0
    for text in texts:
        tokens = tokenize(text)
        for token in tokens:
            if token not in vocabulary:
                vocabulary[token] = index
                index += 1
    return vocabulary

def text_to_vector(text, vocabulary):
    vector = [0] * len(vocabulary)
    tokens = tokenize(text)
    for token in tokens:
        if token in vocabulary:
            index = vocabulary[token]
            vector[index] += 1
    return vector

# Build vocabulary
vocabulary = build_vocab(texts)

# Convert texts to vectors
X = np.array([text_to_vector(text, vocabulary) for text in texts])

In [None]:
def encode_labels(labels):
    label_to_index = {}
    index = 0
    encoded_labels = []

    for label in labels:
        if label not in label_to_index:
            label_to_index[label] = index
            index += 1
        encoded_labels.append(label_to_index[label])

    return np.array(encoded_labels), label_to_index

# Encode labels
y, label_to_index = encode_labels(labels)

In [None]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        # Initialize weights and bias
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Gradient descent
        for _ in range(self.epochs):
            # Linear model
            linear_model = np.dot(X, self.weights) + self.bias
            # Apply sigmoid function
            y_predicted = self.sigmoid(linear_model)

            # Compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            # Update weights and bias
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        return [1 if i > 0.5 else 0 for i in y_predicted]

# Training the model
model = LogisticRegression(learning_rate=0.01, epochs=1000)
model.fit(X, y)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

y_pred = model.predict(X)

accuracy = accuracy_score(y, y_pred)
# Set average to 'micro', 'macro', 'weighted', or None
precision = precision_score(y, y_pred, average='micro')
recall = recall_score(y, y_pred, average='micro')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

Accuracy: 0.05
Precision: 0.05
Recall: 0.05
