In [None]:
!pip install torch pandas scikit-learn

In [None]:
import os
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
cwd = os.getcwd()
data_dir = os.path.join(os.path.dirname(cwd), "data")
data = pd.read_csv(os.path.join(data_dir, "data.csv"))
data.columns

In [None]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data['title'])
X = torch.tensor(X.todense(), dtype=torch.float32)
y = torch.tensor(data['is_positive'].map({'t': 1, 'f': 0}).astype('float').values, dtype=torch.float32).unsqueeze(1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X.shape

In [None]:
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        outputs = torch.sigmoid(self.linear(x))
        return outputs

# Initialize the model
input_dim = X_train.shape[1]
model = LogisticRegressionModel(input_dim)


In [None]:
# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
num_epochs = 10000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


In [None]:
# Predict on the test set
with torch.no_grad():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round()

# Calculate accuracy
accuracy = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
print(f'Accuracy: {accuracy:.4f}')
