<a href="https://colab.research.google.com/github/tanuja1708/EEG-emotions/blob/main/feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
from google.colab import drive
drive.mount('/content/drive')

# Navigate to the correct directory
csv_file = "/content/processed_eeg_data.csv"


Mounted at /content/drive


In [11]:
import numpy as np
import pandas as pd
import scipy.signal as signal
import pywt


In [24]:
import numpy as np
import pandas as pd
import scipy.signal as signal
import pywt
from scipy import stats # Import the stats module from scipy

def extract_time_features(eeg_signal):
    """Extracts time-domain features from an EEG signal."""
    mean = np.mean(eeg_signal)
    std = np.std(eeg_signal)
    kurtosis = stats.kurtosis(eeg_signal) # Use stats.kurtosis
    skewness = stats.skew(eeg_signal) # Use stats.skew

    print(f"Time Features: Mean={mean}, Std={std}, Kurtosis={kurtosis}, Skewness={skewness}")

    return mean, std, kurtosis, skewness

def extract_frequency_features(eeg_signal, sampling_rate=256):
    """Extracts frequency-domain features using FFT."""
    nperseg = min(256, len(eeg_signal))  # Adjust dynamically
    freqs, psd = signal.welch(eeg_signal, fs=sampling_rate, nperseg=nperseg)

    # Check that power spectral density is not zero for the frequency bands
    delta_range = (freqs >= 0.5) & (freqs < 4)
    theta_range = (freqs >= 4) & (freqs < 8)
    alpha_range = (freqs >= 8) & (freqs < 14)
    beta_range = (freqs >= 14) & (freqs < 30)
    gamma_range = (freqs >= 30) & (freqs < 40)

    delta_power = np.sum(psd[delta_range]) if np.any(delta_range) else 0
    theta_power = np.sum(psd[theta_range]) if np.any(theta_range) else 0
    alpha_power = np.sum(psd[alpha_range]) if np.any(alpha_range) else 0
    beta_power = np.sum(psd[beta_range]) if np.any(beta_range) else 0
    gamma_power = np.sum(psd[gamma_range]) if np.any(gamma_range) else 0

    print(f"Frequency Features: Delta={delta_power}, Theta={theta_power}, Alpha={alpha_power}, Beta={beta_power}, Gamma={gamma_power}")

    return delta_power, theta_power, alpha_power, beta_power, gamma_power


def extract_wavelet_features(eeg_signal, wavelet='db4'):
    """Extracts wavelet transform features with dynamic level adjustment."""
    max_level = pywt.dwt_max_level(len(eeg_signal), pywt.Wavelet(wavelet))
    level = min(3, max_level)  # Ensure level is not too high
    coeffs = pywt.wavedec(eeg_signal, wavelet, level=level)

    wavelet_features = [np.mean(abs(c)) for c in coeffs]

    print(f"Wavelet Features: {wavelet_features}")

    return wavelet_features

def extract_features_from_csv(csv_file):
    """Extracts all features from the EEG dataset."""
    df = pd.read_csv(csv_file)
    feature_list = []

    for index, row in df.iterrows():
        print(f"\nProcessing Row {index+1}/{len(df)}...")
        eeg_signal = np.array(row)

        time_features = extract_time_features(eeg_signal)
        freq_features = extract_frequency_features(eeg_signal)
        wavelet_features = extract_wavelet_features(eeg_signal)

        features = list(time_features) + list(freq_features) + wavelet_features
        feature_list.append(features)

    feature_columns = ["mean", "std", "kurtosis", "skewness", "delta_power", "theta_power", "alpha_power", "beta_power", "gamma_power"] + [f"wavelet_{i}" for i in range(len(wavelet_features))]

    feature_df = pd.DataFrame(feature_list, columns=feature_columns)
    feature_df.to_csv("features.csv", index=False)
    print("\n✅ Feature extraction complete! Saved to features.csv")

# Run feature extraction
extract_features_from_csv("processed_eeg_data.csv")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Time Features: Mean=nan, Std=nan, Kurtosis=nan, Skewness=nan
Frequency Features: Delta=0, Theta=0, Alpha=nan, Beta=nan, Gamma=nan
Wavelet Features: [nan, nan]

Processing Row 594202/595200...
Time Features: Mean=nan, Std=nan, Kurtosis=nan, Skewness=nan
Frequency Features: Delta=0, Theta=0, Alpha=nan, Beta=nan, Gamma=nan
Wavelet Features: [nan, nan]

Processing Row 594203/595200...
Time Features: Mean=nan, Std=nan, Kurtosis=nan, Skewness=nan
Frequency Features: Delta=0, Theta=0, Alpha=nan, Beta=nan, Gamma=nan
Wavelet Features: [nan, nan]

Processing Row 594204/595200...
Time Features: Mean=-0.7539651487744856, Std=7.940466037436444, Kurtosis=2.852533086350146, Skewness=-1.8226687866579052
Frequency Features: Delta=0, Theta=0, Alpha=0.2493307746417163, Beta=0.030187159969093706, Gamma=0.11146550934044663
Wavelet Features: [6.228910840054778, 6.057003603277989]

Processing Row 594205/595200...
Time Features: Mean=-0.77319669

In [25]:
import pandas as pd
features_df = pd.read_csv("features.csv")
print(features_df.head())  # Check if features are loaded correctly


       mean        std  kurtosis  skewness  delta_power  theta_power  \
0  4.038891  22.657806  2.783158  1.680400            0            0   
1  4.219708  22.952227  2.739531  1.760415            0            0   
2  3.934278  22.820650  2.903568  1.808757            0            0   
3  3.265196  22.386151  3.142232  1.783666            0            0   
4  2.748455  21.861870  2.596507  1.566632            0            0   

   alpha_power  beta_power  gamma_power  wavelet_0  wavelet_1  
0     0.858203    7.227299    11.251215  16.359229  15.880550  
1     1.283106    8.021003    10.323559  16.923806  14.828284  
2     1.409273    7.983557     9.292759  16.549160  14.808020  
3     1.343231    7.180577     8.344166  14.744352  15.851768  
4     1.347622    6.269381     7.542285  14.483288  16.344409  


In [39]:
import pandas as pd

# Load extracted features
features_df = pd.read_csv("features.csv")

def assign_emotion_label(row):
    delta, theta, alpha, beta, gamma = row['delta_power'], row['theta_power'], row['alpha_power'], row['beta_power'], row.get('gamma_power', 0)

    if beta > alpha and gamma > theta:
        return 'Happy'
    elif delta > alpha and theta > beta:
        return 'Sad'
    elif alpha > theta and alpha > beta:
        return 'Calm'
    elif gamma > beta and gamma > alpha:
        return 'Angry'
    elif theta > alpha and gamma > beta:
        return 'Anxious'
    elif beta > delta and alpha > theta:
        return 'Excited'
    else:
        return 'Neutral'

# Apply labeling function
features_df['label'] = features_df.apply(assign_emotion_label, axis=1)

# Save labeled dataset
features_df.to_csv("labeled_eeg_data.csv", index=False)
print("✅ Labeled dataset saved as labeled_eeg_data.csv")


✅ Labeled dataset saved as labeled_eeg_data.csv


In [41]:
print(features_df['label'].value_counts())


label
Happy      247571
Neutral    178684
Calm       168945
Name: count, dtype: int64
