In [1]:
import os
import pandas as pd
from torch.utils.data.dataset import Dataset
import numpy as np
import math

class BrainReaderDataset(Dataset):
    def __init__(self, data_folder, mapping_csv):
        self.data_folder = data_folder
        self.mapping_df = pd.read_csv(mapping_csv)

    def __len__(self):
        return len(self.mapping_df)

    def __getitem__(self, idx):
        file_name = self.mapping_df.iloc[idx, 0]  # Assuming the file names are in the first column
        score = self.mapping_df.iloc[idx, 1]  # Assuming the scores are in the second column

        # Load the data from the CSV file
        data = pd.read_csv(os.path.join(self.data_folder, file_name))

        return data, score 

def custom_collate(batch):
        data_batch, score_batch = zip(*batch)
        # Assuming your data is a Pandas DataFrame, convert it to a NumPy array
        data_batch = [data.values for data in data_batch]
        return data_batch, score_batch


In [3]:
#creating dataset

from torch.utils.data import DataLoader

data_folder = "Kevin"
mapping_csv = "name2score.csv"

dataset = BrainReaderDataset(data_folder, mapping_csv)

#creating a dataloader
batch_size = 16
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate)



In [4]:
# Assuming you have already created the DataLoader
for batch_idx, (data, score) in enumerate(dataloader):
    print(f"Batch {batch_idx + 1}:")
    print("Data:")
    print(data)
    print("Scores:")
    print(score)
    print("\n")
    if batch_idx == 0:
        break  # Print only the first batch for brevity

Batch 1:
Data:
[array([[ 0.22306471,  0.01826768,  0.31127413,  0.51975448,  0.38724612,
         0.16065071,  0.50499703,  0.69787012,  0.25239507, -0.01086647,
         0.39430647,  0.72750869,  0.35234995,  0.07764651,  0.45651813,
         0.82099597],
       [ 0.3467488 ,  0.13531894,  0.46733831,  0.76072662,  0.3751211 ,
         0.24013495,  0.4819111 ,  0.85588533,  0.22733843,  0.03582695,
         0.25993264,  0.6712    ,  0.33707079,  0.08823416,  0.297738  ,
         0.61363583],
       [ 0.32222318,  0.09137257,  0.26243299,  0.62317033,  0.41988284,
         0.18708757,  0.29872326,  0.74848245,  0.33493978,  0.01802063,
         0.20922819,  0.55003198,  0.38027203,  0.06617612,  0.26366612,
         0.5683607 ],
       [ 0.33549889,  0.10693426,  0.27051463,  0.59823254,  0.33372205,
         0.19324684,  0.42153298,  0.77989896,  0.22005029,  0.03669691,
         0.33523158,  0.83535068,  0.35715246,  0.11676753,  0.45024645,
         0.93460382],
       [ 0.31216751,

In [6]:
import torch
import torch.nn as nn

class BinaryClassifier(nn.Module):
    def __init__(self, input_size):
        super(BinaryClassifier, self).__init__()
        
        # Define layers
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Instantiate the model
input_size = 9 * 16  # Assuming input is a 10x16 array
model = BinaryClassifier(input_size)

# Print the model architecture
print(model)


BinaryClassifier(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=144, out_features=128, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# This block is to test out a classifer with the dataloader. NOTE: This does not split the data
# Assuming you have the BinaryClassifier model and DataLoader already defined
input_size = 9 * 16  # Assuming input is a 10x16 array
model = BinaryClassifier(input_size)
data_folder = "Kevin"
mapping_csv = "name2score.csv"
dataset = BrainReaderDataset(data_folder, mapping_csv)
batch_size = 16
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0.0
    correct_predictions = 0

    for batch_idx, (data, labels) in enumerate(dataloader):
        optimizer.zero_grad()  # Zero the gradients
        print(torch.Tensor(data).shape)
        # Forward pass
        outputs = model(torch.Tensor(data))  # Assuming data is a NumPy array

        # Calculate loss
        labels = torch.Tensor(labels).view(-1, 1)  # Reshape labels to match the output shape
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Optimize
        optimizer.step()

        # Accumulate statistics
        total_loss += loss.item()
        predictions = (outputs > 0.5).float()  # Convert probabilities to binary predictions
        correct_predictions += torch.sum(predictions == labels).item()

    # Print statistics for the epoch
    average_loss = total_loss / len(dataloader)
    accuracy = correct_predictions / len(dataset)
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss:.4f}, Accuracy: {accuracy:.4f}")


torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([1, 9, 16])
Epoch 1/10, Loss: 0.6491, Accuracy: 0.6939
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([1, 9, 16])
Epoch 2/10, Loss: 0.7035, Accuracy: 0.6939
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([1, 9, 16])
Epoch 3/10, Loss: 0.5699, Accuracy: 0.6939
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([1, 9, 16])
Epoch 4/10, Loss: 0.4907, Accuracy: 0.6939
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([1, 9, 16])
Epoch 5/10, Loss: 0.7322, Accuracy: 0.6939
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([1, 9, 16])
Epoch 6/10, Loss: 0.7190, Accuracy: 0.6939
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([16, 9, 16])
torch.Size([1, 9, 16])
Epoch 7/10, Loss: 0.5213, Accuracy: 0.6939
torch.Size([16, 9, 16])
torch.Size

In [25]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import numpy as np
from sklearn.model_selection import train_test_split


# Define data folder and mapping CSV
data_folder = "Kevin"
mapping_csv = "name2score.csv"

# Create dataset instance
dataset = BrainReaderDataset(data_folder, mapping_csv)

# Split dataset into training and testing sets
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create DataLoader instances for training and testing
batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)

# Define input size based on the shape of your data
input_size =  9 * 16
#train_dataloader.dataset[0][0].shape[1]

# Instantiate the binary classifier model
model = BinaryClassifier(input_size)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    total_loss = 0.0

    # Iterate over batches of data
    for data_batch, score_batch in train_dataloader:
        optimizer.zero_grad()  # Zero the gradients
        data_batch = torch.tensor(data_batch, dtype=torch.float32)
        outputs = model(data_batch)  # Forward pass
        labels = torch.tensor(score_batch, dtype=torch.float32).view(-1, 1)
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
        total_loss += loss.item() * len(data_batch)  # Accumulate the total loss

    # Calculate the average loss for the epoch
    average_loss = total_loss / len(train_dataloader.dataset)
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss:.4f}")

# Evaluation on test set
model.eval()  # Set the model to evaluation mode
test_loss = 0.0
correct_predictions = 0

# Iterate over batches of data
for data_batch, score_batch in test_dataloader:
    data_batch = torch.tensor(data_batch, dtype=torch.float32)
    outputs = model(data_batch)  # Forward pass
    labels = torch.tensor(score_batch, dtype=torch.float32).view(-1, 1)
    test_loss += criterion(outputs, labels).item() * len(data_batch)  # Compute the total loss
    predictions = (outputs > 0.5).float()  # Convert probabilities to binary predictions
    correct_predictions += torch.sum(predictions == labels).item()  # Count correct predictions

# Calculate average test loss and accuracy
average_test_loss = test_loss / len(test_dataloader.dataset)
accuracy = correct_predictions / len(test_dataloader.dataset)
print(f"Test Loss: {average_test_loss:.4f}, Accuracy: {accuracy:.4f}")


Epoch 1/10, Loss: 0.6746
Epoch 2/10, Loss: 0.6361
Epoch 3/10, Loss: 0.6089
Epoch 4/10, Loss: 0.6046
Epoch 5/10, Loss: 0.6014
Epoch 6/10, Loss: 0.6048
Epoch 7/10, Loss: 0.6047
Epoch 8/10, Loss: 0.6036
Epoch 9/10, Loss: 0.6014
Epoch 10/10, Loss: 0.5945
Test Loss: 0.5651, Accuracy: 0.7000
