In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from imblearn.over_sampling import RandomOverSampler

In [None]:
train_data = pd.read_csv("samples_train_0.csv")
test_data = pd.read_csv("labels_train_0.csv")

X = train_data.iloc[:, 1:].values
y = test_data.iloc[:, 1:].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the class distribution before oversampling
print("Class distribution before oversampling:")
print(pd.Series(y_train.flatten()).value_counts())

# Initialize the oversampler
oversampler = RandomOverSampler(random_state=42)

# Resample the training data
X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)

# Check the class distribution after oversampling
print("Class distribution after oversampling:")
print(pd.Series(y_train_resampled.flatten()).value_counts())

# Standardize the features using StandardScaler
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)


In [None]:
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).view(-1, 1)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test).view(-1, 1)

In [None]:
# cnt = 0
# total = len(y_train_tensor)
# indexes = []
# for i in range(0 ,len(y_train_tensor)):
#     if y_train_tensor[i] == 1:
#         indexes.append(i)

In [None]:
class BinaryClassifier(nn.Module):
    def __init__(self, input_size):
        super(BinaryClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 139)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(139, 64)
        self.fc3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

    def predict(self, x, threshold=0.4):
        output = self.forward(x)
        predictions = (output > threshold).float()
        return predictions

In [None]:
input_size = X_train_tensor.shape[1]
model = BinaryClassifier(input_size)

In [None]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
from sklearn import svm

clf = svm.SVC()
clf.fit(X_train, y_train.ravel())


In [None]:
epochs = 100
train_loss = []
valid_loss = []
total_epochs = []
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    # print(pd.Series(outputs.flatten().detach().numpy() <= 1).value_counts())
    # print(pd.Series(y_train_tensor.flatten().detach().numpy()).value_counts())

    if (epoch + 1) % 10 == 0:
        total_epochs.append(epoch)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}')
        train_loss.append(loss.item())
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_tensor)
            # predicted_labels = (predictions >= 0.5).float()
            # accuracy = (predicted_labels == y_test_tensor).sum().item() / y_test_tensor.size(0)
            average_valid_loss = criterion(outputs, y_test_tensor)
            valid_loss.append(average_valid_loss)
            print(f'Loss on the validation set: {average_valid_loss}')

plt.plot(total_epochs, train_loss, label='Training Loss')
plt.plot(total_epochs, valid_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Model Training and Validation Loss')
plt.legend()
plt.show()

In [None]:
test_data = pd.read_csv("samples_test_0.csv")
test_data_labels = pd.read_csv("labels_test_0.csv")

X = test_data.iloc[:, 1:].values
y = test_data_labels.iloc[:, 1:].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_test_tensor = torch.FloatTensor(X)
y_test_tensor = torch.FloatTensor(y).view(-1, 1)

model.eval()
predicted_labels = None
with torch.no_grad():
    predictions = model(X_test_tensor)
    predicted_labels = (predictions >= 0.5).float()

cnt = 0
for i in range(0 ,len(predicted_labels)):
    if y_test_tensor[i] == 1:
        cnt += 1
print(cnt, len(predicted_labels) - cnt)

accuracy = (predicted_labels == y_test_tensor).sum().item() / len(y_test_tensor)
# print(predicted_labels)
print(f'Accuracy on the test set: {accuracy}')

res = clf.predict(X)

cnt = 0
total = len(res)
for idx, val in enumerate(res):
    if val == y_test_tensor[idx]:
        cnt += 1
print(cnt/total)

In [None]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import numpy as np

accuracy = accuracy_score(y, predicted_labels)
print(f'Accuracy: {accuracy:.2f}')

# Generate confusion matrix
conf_matrix = confusion_matrix(y, predicted_labels)
print('Confusion Matrix:')
print(conf_matrix)

precision = precision_score(y, predicted_labels)
recall = recall_score(y, predicted_labels)
f1 = f1_score(y, predicted_labels)

print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')