In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import pandas as pd

# Path to original folder
folder_path = "/content/drive/My Drive/AI4ALL PROJECT/GAMEEMO"

# Path to save cleaned files
clean_folder = "/content/drive/My Drive/AI4ALL PROJECT/CLEANED"
os.makedirs(clean_folder, exist_ok=True)  # Create if it doesn't exist

subfolders = os.listdir(folder_path)

for subfolder in subfolders:
    subfolder_path = os.path.join(folder_path, subfolder)

    if not os.path.isdir(subfolder_path):
        continue

    for file in os.listdir(subfolder_path):
        if file.endswith('.csv'):
            file_path = os.path.join(subfolder_path, file)

            try:
                # Load CSV
                df = pd.read_csv(file_path)

                # === CLEANING START ===
                df = df.dropna(axis=1, how='all')  # Drop all-null columns
                df = df.drop(columns=[col for col in df.columns if col.startswith('Unnamed')], errors='ignore')
                df = df.dropna()  # Drop rows with any nulls
                # === CLEANING END ===

                # === SAVE CLEANED FILE ===
                cleaned_filename = f"cleaned_{file}"
                save_path = os.path.join(clean_folder, cleaned_filename)
                df.to_csv(save_path, index=False)

            except Exception as e:
                print(f"Error processing {file}: {e}")


In [None]:
import os
import pandas as pd
import re

# Define brain region channel groups
brain_regions = {
    "Frontal": ['AF3', 'AF4', 'F3', 'F4', 'F7', 'F8'],
    "Temporal": ['T7', 'T8'],
    "Parietal": ['P7', 'P8'],
    "Occipital": ['O1', 'O2'],
    "Fronto-central": ['FC5', 'FC6']
}

# Path to original folder
folder_path = "/content/drive/My Drive/AI4ALL PROJECT/CLEANED"

for file in os.listdir(folder_path):
    if file.endswith('.csv'):
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)

        # Extract participant and game IDs from filename
        match = re.search(r'(S\d+)_?(G\d+)', file)
        if match:
            participant = match.group(1)
            game = match.group(2)
        else:
            participant = "Unknown"
            game = "Unknown"

        # Add regional averages to df
        for region in brain_regions:
            channels = brain_regions[region]
            available_channels = []
            for ch in channels:
                if ch in df.columns:
                    available_channels.append(ch)
            if available_channels:
                df[region] = df[available_channels].mean(axis=1)

        # Add ID columns
        df['Participant'] = participant
        df['Game'] = game

        # Overwrite the original file OR save to a new one
        df.to_csv(file_path, index=False)  # ← saves updated df

        print(f"{file} updated and saved.")


In [None]:
import os
import pandas as pd

# Step 1: Load Google Sheet as CSV
sheet_id = "1lse2fQnwI-cZrOQcE2iWBCxMI94L3Mc41fyNmUVbVhU"
sheet_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv"
label_df = pd.read_csv(sheet_url)

# Step 2: Normalize the filenames to match your .csv files
def normalize_filename(pdf_name):
    name = pdf_name.replace('(', '').replace(')', '').replace('.pdf', '.csv')
    return name

label_df['normalized'] = label_df['pdf_file'].apply(normalize_filename)
emotion_mapping = dict(zip(label_df['normalized'], label_df['dominant_emotion']))

# Step 3: Path to already cleaned CSVs
clean_folder = "/content/drive/My Drive/AI4ALL PROJECT/CLEANED"

# Step 4: Add 'Emotion' column to each file
for file in os.listdir(clean_folder):
    if file.endswith('.csv'):
        file_path = os.path.join(clean_folder, file)

        try:
            df = pd.read_csv(file_path)
            emotion = emotion_mapping[file]
            df['Emotion'] = emotion
            df.to_csv(file_path, index=False)  # overwrite

        except Exception as e:
            print(f"Error processing {file}: {e}")


In [None]:
# Step 1: Load the dataset
# - Load your cleaned CSV files

# Step 2: Clean the dataset (if not already cleaned)
# - Drop unnecessary columns (like IDs or indexes)
# - Remove rows with missing values or fill them in
# - Make sure all values are consistent and usable

# Step 3: Explore the dataset
# - Check number of rows and columns
# - Look at value counts for labels
# - Use visualizations to understand data distribution

# === HERE ===

# Step 4: Preprocess the data
# - Encode text labels into numbers (label encoding)
# - Normalize or scale features for better model performance
# - Handle any categorical variables if needed

# Step 5: Split the data
# - Split your data into training and testing sets (e.g., 80% train, 20% test)

# Step 6: Choose and train your model
# - Select a machine learning algorithm (SVM)
# - Train the model using the training data

# Step 7: Evaluate the model
# - Predict using the test set
# - Calculate accuracy, precision, recall, and F1-score
# - Analyze the results to see how well your model performs

# Step 8: Save the model
# - Save your trained model so it can be used again later

# Step 9: Document your work
# - Add explanations and results
# - Prepare a final report or presentation if needed
