<a href="https://colab.research.google.com/github/marzia272/AI4ALL_Project/blob/marzia/AI4ALL_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 📊 Data Handling
import pandas as pd                 # For dataframes and CSV operations
import numpy as np                 # For numerical operations
import os                          # For file and folder access

# 🧼 Data Preprocessing
from sklearn.impute import KNNImputer         # To fill missing values using K-nearest neighbors
from sklearn.preprocessing import StandardScaler  # To normalize EEG data
from sklearn.preprocessing import LabelEncoder     # To encode emotion labels

# 🤖 Machine Learning Models
from sklearn.svm import SVC                     # Support Vector Classifier
from sklearn.ensemble import RandomForestClassifier   # Random Forest for comparison
from sklearn.linear_model import LogisticRegression   # Logistic Regression baseline

# 🎯 Model Training and Evaluation
from sklearn.model_selection import train_test_split     # To split data into train and test sets
from sklearn.model_selection import cross_val_score      # For cross-validation
from sklearn.metrics import classification_report         # To show precision, recall, F1-score
from sklearn.metrics import confusion_matrix, accuracy_score  # To measure performance

# 📉 Dimensionality Reduction / Feature Selection (optional)
from sklearn.decomposition import PCA               # Principal Component Analysis for visualization
from sklearn.feature_selection import SelectKBest, f_classif  # Feature selection based on ANOVA

# 📈 Data Visualization
import matplotlib.pyplot as plt      # For plotting graphs
import seaborn as sns                # For heatmaps, better visual styles

# ⚠️ Suppress Warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=RuntimeWarning, message='divide by zero encountered in log10')


In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
import zipfile
import os

# Path to the uploaded ZIP file
zip_path = "GAMEEMO.zip"  # Replace with actual name
extract_path = "Dataset"  # Replace with your desired folder

# Create the folder if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Unzipped to:", extract_path)


In [None]:
# Install dependencies as needed:
# pip install kagglehub[pandas-datasets]
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
file_path = "S01G1AllChannels.csv"

# Load the latest version
df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "wajahat1064/emotion-recognition-using-eeg-and-computer-games",
  file_path,
  # Provide any additional arguments like
  # sql_query or pandas_kwargs. See the
  # documenation for more information:
  # https://github.com/Kaggle/kagglehub/blob/main/README.md#kaggledatasetadapterpandas
)

print("First 5 records:", df.head())

In [None]:
# Check for missing values
missing_summary = df.isnull().sum()
print("Missing values per column:\n", missing_summary)

# Drop columns with excessive missing values (optional)
threshold = 0.5 * len(df)
df = df.loc[:, df.isnull().sum() < threshold]

# Use KNN imputer for remaining missing values
imputer = KNNImputer(n_neighbors=5)
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

# Replace original with imputed
df = df_imputed


In [None]:
# Visualize distributions to find outliers
plt.figure(figsize=(12, 6))
sns.boxplot(data=df)
plt.xticks(rotation=90)
plt.title("Boxplots to visualize outliers")
plt.show()

# Optionally: remove rows with outliers using IQR method
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1

# Filter out outliers
df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]


In [None]:
# Check for object or categorical columns
print("Non-numeric columns:", df.select_dtypes(include=['object', 'category']).columns)

# If any categorical columns are present, encode them
# Example: df['Emotion'] = df['Emotion'].map({'Happy': 0, 'Sad': 1, 'Angry': 2, ...})
# Or use one-hot encoding:
# df = pd.get_dummies(df, drop_first=True)


In [None]:
# Example: mean signal per row (across channels)
df['Signal_Mean'] = df.mean(axis=1)
df['Signal_Std'] = df.std(axis=1)

# Optional: Feature interaction
df['Mean_to_Std_Ratio'] = df['Signal_Mean'] / (df['Signal_Std'] + 1e-5)


In [None]:
print("Shape after all prep:", df.shape)
print(df.head())
