# NeuroLab: EEG Data Generation and ExplorationThis notebook demonstrates how to generate synthetic EEG data for mental state classification.**Mental States:**- 0: Relaxed (high alpha, low beta)- 1: Focused (high beta, moderate alpha)- 2: Stressed (very high beta, low alpha, elevated gamma)**Author:** NeuroLab Team  **License:** MIT

## 1. Setup and Imports

In [None]:
# Install required packages (uncomment if needed on Kaggle)# !pip install numpy pandas matplotlib seaborn scikit-learnimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsfrom datetime import datetime# Set styleplt.style.use('seaborn-v0_8-darkgrid')sns.set_palette("husl")print("✓ Imports successful")print(f"NumPy version: {np.__version__}")print(f"Pandas version: {pd.__version__}")

## 2. Data Generation Functions

In [None]:
def generate_realistic_band_powers(state, num_samples=1000):    """    Generate realistic frequency band power values for different mental states.        Args:        state: Mental state ('relaxed', 'focused', or 'stressed')        num_samples: Number of samples to generate        Returns:        List of sample dictionaries with frequency band powers    """    samples = []        for _ in range(num_samples):        if state == 'relaxed':            # Relaxed state: High alpha rhythm            alpha = np.random.uniform(15, 35)            beta = np.random.uniform(3, 12)            theta = np.random.uniform(5, 15)            delta = np.random.uniform(2, 8)            gamma = np.random.uniform(1, 5)                    elif state == 'focused':            # Focused state: elevated beta            alpha = np.random.uniform(8, 20)            beta = np.random.uniform(15, 35)            theta = np.random.uniform(2, 8)            delta = np.random.uniform(1, 5)            gamma = np.random.uniform(5, 15)                    elif state == 'stressed':            # Stressed state: very high beta            alpha = np.random.uniform(3, 12)            beta = np.random.uniform(25, 50)            theta = np.random.uniform(8, 18)            delta = np.random.uniform(3, 10)            gamma = np.random.uniform(12, 30)                # Add natural variation        alpha += np.random.normal(0, 2)        beta += np.random.normal(0, 3)        theta += np.random.normal(0, 2)        delta += np.random.normal(0, 2)        gamma += np.random.normal(0, 2)                # Ensure positive values        alpha = max(0.1, alpha)        beta = max(0.1, beta)        theta = max(0.1, theta)        delta = max(0.1, delta)        gamma = max(0.1, gamma)                samples.append({            'alpha': alpha,            'beta': beta,            'theta': theta,            'delta': delta,            'gamma': gamma,            'state': 0 if state == 'relaxed' else (1 if state == 'focused' else 2)        })        return samplesprint("✓ Data generation functions defined")

## 3. Generate Training Data

In [None]:
# Generate samples for each statesamples_per_state = 5000print(f"Generating {samples_per_state} samples per state...")print("="*60)all_samples = []print("Generating 'relaxed' state samples...")all_samples.extend(generate_realistic_band_powers('relaxed', samples_per_state))print("Generating 'focused' state samples...")all_samples.extend(generate_realistic_band_powers('focused', samples_per_state))print("Generating 'stressed' state samples...")all_samples.extend(generate_realistic_band_powers('stressed', samples_per_state))# Convert to DataFramedf = pd.DataFrame(all_samples)df = df.sample(frac=1, random_state=42).reset_index(drop=True)print("="*60)print(f"✓ Total samples generated: {len(df)}")print(f"\nState distribution:")print(df['state'].value_counts().sort_index())

## 4. Data Exploration

In [None]:
# Display first few rowsprint("First 10 samples:")df.head(10)

In [None]:
# Statistical summarydf.describe()

In [None]:
# Summary by statestate_names = {0: 'Relaxed', 1: 'Focused', 2: 'Stressed'}summary = df.groupby('state')[['alpha', 'beta', 'theta', 'delta', 'gamma']].mean()summary.index = summary.index.map(state_names)summary

## 5. Visualizations

In [None]:
# Distribution of frequency bands by statefig, axes = plt.subplots(2, 3, figsize=(18, 10))fig.suptitle('EEG Frequency Band Distributions by Mental State', fontsize=16, fontweight='bold')bands = ['alpha', 'beta', 'theta', 'delta', 'gamma']colors = ['#FF6B6B', '#4ECDC4', '#45B7D1']state_labels = ['Relaxed', 'Focused', 'Stressed']for idx, band in enumerate(bands):    ax = axes[idx // 3, idx % 3]    for state in range(3):        data = df[df['state'] == state][band]        ax.hist(data, bins=50, alpha=0.6, label=state_labels[state], color=colors[state])    ax.set_title(f'{band.capitalize()} Band', fontsize=12, fontweight='bold')    ax.set_xlabel('Power (μV²)')    ax.set_ylabel('Frequency')    ax.legend()    ax.grid(True, alpha=0.3)fig.delaxes(axes[1, 2])plt.tight_layout()plt.show()

In [None]:
# Box plotsfig, axes = plt.subplots(1, 5, figsize=(20, 4))fig.suptitle('EEG Frequency Bands Comparison', fontsize=16, fontweight='bold')for idx, band in enumerate(bands):    df_plot = df.copy()    df_plot['state_name'] = df_plot['state'].map(state_names)    sns.boxplot(data=df_plot, x='state_name', y=band, ax=axes[idx], palette='Set2')    axes[idx].set_title(f'{band.capitalize()}', fontweight='bold')    axes[idx].set_xlabel('Mental State')    axes[idx].set_ylabel('Power (μV²)')plt.tight_layout()plt.show()

In [None]:
# Correlation heatmapplt.figure(figsize=(10, 8))correlation = df[['alpha', 'beta', 'theta', 'delta', 'gamma']].corr()sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0, square=True)plt.title('Correlation Matrix of EEG Frequency Bands', fontsize=14, fontweight='bold')plt.tight_layout()plt.show()

## 6. Feature Engineering

In [None]:
# Create additional featuresdf['alpha_beta_ratio'] = df['alpha'] / df['beta']df['theta_beta_ratio'] = df['theta'] / df['beta']df['total_power'] = df['alpha'] + df['beta'] + df['theta'] + df['delta'] + df['gamma']df['alpha_percentage'] = (df['alpha'] / df['total_power']) * 100df['beta_percentage'] = (df['beta'] / df['total_power']) * 100print("✓ Additional features created")df[['alpha_beta_ratio', 'theta_beta_ratio', 'total_power']].head()

## 7. Save Dataset

In [None]:
# Save to CSVoutput_file = 'eeg_mental_states_dataset.csv'df.to_csv(output_file, index=False)print(f"✓ Dataset saved to: {output_file}")print(f"  Total samples: {len(df)}")print(f"  Features: {len(df.columns)}")