In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!cp -r "/content/drive/MyDrive/IITG_assignments/internship/internship" "/content"

In [3]:
!apt-get install tree

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  tree
0 upgraded, 1 newly installed, 0 to remove and 35 not upgraded.
Need to get 47.9 kB of archives.
After this operation, 116 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tree amd64 2.0.2-1 [47.9 kB]
Fetched 47.9 kB in 0s (411 kB/s)
Selecting previously unselected package tree.
(Reading database ... 126284 files and directories currently installed.)
Preparing to unpack .../tree_2.0.2-1_amd64.deb ...
Unpacking tree (2.0.2-1) ...
Setting up tree (2.0.2-1) ...
Processing triggers for man-db (2.10.2-1) ...


In [4]:
!tree /content/internship/

[01;34m/content/internship/[0m
├── [01;34mData[0m
│   ├── [01;34mAP01[0m
│   │   ├── [00mFlow - 30-05-2024.txt[0m
│   │   ├── [00mFlow Events - 30-05-2024.txt[0m
│   │   ├── [00mSleep profile - 30-05-2024.txt[0m
│   │   ├── [00mSPO2 - 30-05-2024.txt[0m
│   │   └── [00mThorac - 30-05-2024.txt[0m
│   ├── [01;34mAP02[0m
│   │   ├── [00mFlow  - 30.05.2024.txt[0m
│   │   ├── [00mFlow Events  - 30.05.2024.txt[0m
│   │   ├── [00mSleep profile  - 30.05.2024.txt[0m
│   │   ├── [00mSPO2  - 30.05.2024.txt[0m
│   │   └── [00mThorac  - 30.05.2024.txt[0m
│   ├── [01;34mAP03[0m
│   │   ├── [00mFlow - 29_05_2024.txt[0m
│   │   ├── [00mFlow Events - 29_05_2024.txt[0m
│   │   ├── [00mSleep profile - 29_05_2024.txt[0m
│   │   ├── [00mSPO2 - 29_05_2024.txt[0m
│   │   └── [00mThorac - 29_05_2024.txt[0m
│   ├── [01;34mAP04[0m
│   │   ├── [00mFlow Events - 29.05.2024.txt[0m
│   │   ├── [00mFlow Signal - 29.05.2024.txt[0m
│   │   ├── [00mSleep profile - 29.05.202

In [5]:
import os
import re
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Unique Opportunity for Raise! [Bonus 5 marks]


In [7]:
from datetime import timedelta


def create_sleep_stage_dataset(processed_data_path):
    """
    Creates a dataset for sleep stage classification with:
    - 30-second windows with 50% overlap (15s step)
    - Sleep stage labels from Sleep_profile.csv
    - Same respiratory signals as features
    - Handles all data validation and error cases
    """
    processed_data_path = Path(processed_data_path)
    dataset_path = processed_data_path.parent / "Sleep_Stage_Dataset"
    dataset_path.mkdir(exist_ok=True)

    # Initialize combined dataset
    all_windows = []

    # Process each subject from AP01 to AP05
    for subject_id in [f"AP{i:02d}" for i in range(1, 6)]:
        subject_dir = processed_data_path / subject_id
        if not subject_dir.exists():
            print(f"\nSkipping {subject_id} - folder not found")
            continue

        print(f"\nProcessing {subject_id}...")
        try:
            # Load required files with validation
            def load_csv(file):
                path = subject_dir / file
                if not path.exists():
                    raise FileNotFoundError(f"Missing file: {path}")
                return pd.read_csv(path)

            flow = load_csv("filtered_Flow.csv")
            thorac = load_csv("filtered_Thorac.csv")
            sleep_profile = load_csv("Sleep_profile.csv")

            # Convert timestamps with error handling
            def convert_timestamps(df):
                if 'DateTime' in df.columns:
                    df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')
                elif 'Original_Timestamp' in df.columns:
                    df['DateTime'] = pd.to_datetime(df['Original_Timestamp'],
                                                  format='%d.%m.%Y %H:%M:%S,%f',
                                                  errors='coerce')
                return df.dropna(subset=['DateTime'])

            flow = convert_timestamps(flow)
            thorac = convert_timestamps(thorac)
            sleep_profile = convert_timestamps(sleep_profile)

            # Create 30-second windows with 50% overlap (15s step)
            min_time = max(
                flow['DateTime'].min(),
                thorac['DateTime'].min(),
                sleep_profile['DateTime'].min()
            )
            max_time = min(
                flow['DateTime'].max(),
                thorac['DateTime'].max(),
                sleep_profile['DateTime'].max()
            )

            window_size = timedelta(seconds=30)
            step_size = timedelta(seconds=15)
            current_time = min_time

            # Initialize subject-specific data collector
            subject_windows = []

            while current_time + window_size <= max_time:
                window_start = current_time
                window_end = current_time + window_size

                # Get sleep stage for this window
                window_sleep = sleep_profile[
                    (sleep_profile['DateTime'] >= window_start) &
                    (sleep_profile['DateTime'] < window_end)
                ]

                # Use the most frequent sleep stage in the window
                if len(window_sleep) > 0:
                    sleep_stage = window_sleep['Sleep_Stage'].mode()[0]
                else:
                    sleep_stage = "Unknown"

                # Get signal data
                window_flow = flow[
                    (flow['DateTime'] >= window_start) &
                    (flow['DateTime'] < window_end)
                ]
                window_thorac = thorac[
                    (thorac['DateTime'] >= window_start) &
                    (thorac['DateTime'] < window_end)
                ]

                # Only include complete windows
                if len(window_flow) > 0 and len(window_thorac) > 0:
                    # Calculate various features from the signals
                    flow_values = window_flow['Filtered_Flow_Value'].values
                    thorac_values = window_thorac['Filtered_Thoracic_Value'].values

                    subject_windows.append({
                        'subject': subject_id,
                        'window_start': window_start,
                        'window_end': window_end,
                        'flow_mean': np.mean(flow_values),
                        'flow_std': np.std(flow_values),
                        'flow_min': np.min(flow_values),
                        'flow_max': np.max(flow_values),
                        'thoracic_mean': np.mean(thorac_values),
                        'thoracic_std': np.std(thorac_values),
                        'thoracic_min': np.min(thorac_values),
                        'thoracic_max': np.max(thorac_values),
                        'sleep_stage': sleep_stage,
                        'signal_quality': len(window_flow)  # Number of samples as proxy for quality
                    })

                current_time += step_size

            # Add subject data to combined dataset
            if subject_windows:
                all_windows.extend(subject_windows)
                print(f"Processed {len(subject_windows)} windows for {subject_id}")
                print(f"Sleep stage distribution: {pd.Series([x['sleep_stage'] for x in subject_windows]).value_counts().to_dict()}")
            else:
                print(f"No valid windows created for {subject_id}")

        except Exception as e:
            print(f"Error processing {subject_id}: {str(e)}")
            continue

    # Save combined dataset
    if all_windows:
        sleep_stage_df = pd.DataFrame(all_windows)

        # Map sleep stages to standard categories
        stage_mapping = {
            'Wake': 'Wake',
            'N1': 'N1',
            'N2': 'N2',
            'N3': 'N3',
            'REM': 'REM',
            'A': 'Artifact',  # Assuming 'A' stands for artifact
            'Unknown': 'Unknown'
        }
        sleep_stage_df['sleep_stage'] = sleep_stage_df['sleep_stage'].map(stage_mapping)

        # Save the dataset
        output_path = dataset_path / "sleep_stage_classification_dataset.csv"
        sleep_stage_df.to_csv(output_path, index=False)

        print("\nFinal Statistics:")
        print(f"Total subjects processed: {sleep_stage_df['subject'].nunique()}")
        print(f"Total windows created: {len(sleep_stage_df)}")
        print("\nSleep stage distribution across all subjects:")
        print(sleep_stage_df['sleep_stage'].value_counts())

        print(f"\nSaved dataset to: {output_path}")

        return sleep_stage_df
    else:
        print("\nNo valid data processed for any subject")
        return None

processed_data_path = "/content/internship/Processed_Data"
sleep_stage_df = create_sleep_stage_dataset(processed_data_path)


Processing AP01...
Processed 1821 windows for AP01
Sleep stage distribution: {'Wake': 1009, 'N2': 356, 'N3': 206, 'N1': 174, 'REM': 76}

Processing AP02...
Processed 1768 windows for AP02
Sleep stage distribution: {'N2': 712, 'Wake': 348, 'N3': 310, 'N1': 268, 'REM': 112, 'A': 18}

Processing AP03...
Processed 1695 windows for AP03
Sleep stage distribution: {'Wake': 1133, 'N1': 196, 'N2': 192, 'N3': 96, 'REM': 78}

Processing AP04...
Processed 1930 windows for AP04
Sleep stage distribution: {'N2': 592, 'Wake': 540, 'N1': 370, 'N3': 230, 'REM': 198}

Processing AP05...
Processed 1580 windows for AP05
Sleep stage distribution: {'N2': 590, 'N1': 312, 'Wake': 240, 'N3': 224, 'REM': 186, 'A': 28}

Final Statistics:
Total subjects processed: 5
Total windows created: 8794

Sleep stage distribution across all subjects:
sleep_stage
Wake        3270
N2          2442
N1          1320
N3          1066
REM          650
Artifact      46
Name: count, dtype: int64

Saved dataset to: /content/internsh

In [9]:
!cp -r "/content/internship/Sleep_Stage_Dataset/" "/content/drive/MyDrive/IITG_assignments/internship/internship"

In [10]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten, TimeDistributed
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt



# Load the sleep stage dataset
sleep_stage_df = pd.read_csv('/content/internship/Sleep_Stage_Dataset/sleep_stage_classification_dataset.csv')

# Convert datetime columns
sleep_stage_df['window_start'] = pd.to_datetime(sleep_stage_df['window_start'])
sleep_stage_df['window_end'] = pd.to_datetime(sleep_stage_df['window_end'])

# Preprocessing
# Filter out 'Unknown' and 'Artifact' stages
sleep_stage_df = sleep_stage_df[~sleep_stage_df['sleep_stage'].isin(['Unknown', 'Artifact'])]

# Encode sleep stages
stage_order = ['Wake', 'N1', 'N2', 'N3', 'REM']
label_encoder = LabelEncoder()
label_encoder.fit(stage_order)
sleep_stage_df['stage_encoded'] = label_encoder.transform(sleep_stage_df['sleep_stage'])

# Features and labels
numeric_features = ['flow_mean', 'flow_std', 'flow_min', 'flow_max',
                   'thoracic_mean', 'thoracic_std', 'thoracic_min', 'thoracic_max']
X = sleep_stage_df[numeric_features]
y = sleep_stage_df['stage_encoded']
groups = sleep_stage_df['subject'].values

# Standardize numerical features
scaler = StandardScaler()
X[numeric_features] = scaler.fit_transform(X[numeric_features])

# Convert to numpy arrays
X = X.values.astype('float32')
y = y.values

# Reshape data for 1D CNN and Conv-LSTM (samples, timesteps, features)
X_reshaped = X.reshape(X.shape[0], 1, X.shape[1])

# Define Leave-One-Subject-Out cross-validator
logo = LeaveOneGroupOut()

# Initialize dictionaries to store results
results_cnn = {'accuracy': [], 'precision': [], 'recall': [], 'sensitivity': [], 'specificity': []}

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[numeric_features] = scaler.fit_transform(X[numeric_features])


In [11]:
# Define model creation functions
def create_1d_cnn(input_shape, num_classes):
    model = Sequential([
        Conv1D(64, kernel_size=1, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=1),
        Conv1D(128, kernel_size=1, activation='relu'),
        MaxPooling1D(pool_size=1),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    num_classes = cm.shape[0]

    metrics = {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, average='weighted', zero_division=0),
        'recall': recall_score(y_true, y_pred, average='weighted', zero_division=0),
        'sensitivity': 0,
        'specificity': 0
    }

    # Calculate sensitivity (recall for each class)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    metrics['sensitivity'] = np.mean(recalls)

    # Calculate specificity
    specificities = []
    for i in range(num_classes):
        tn = np.sum(np.delete(np.delete(cm, i, axis=0), i, axis=1))
        fp = np.sum(cm[:, i]) - cm[i, i]
        specificities.append(tn / (tn + fp))
    metrics['specificity'] = np.mean(specificities)

    return metrics, cm

In [12]:
# Perform Leave-One-Subject-Out cross-validation
for fold, (train_idx, test_idx) in enumerate(logo.split(X, y, groups)):
    print(f"\n=== Fold {fold+1} - Testing on subject {groups[test_idx[0]]} ===")

    # Split data
    X_train, X_test = X_reshaped[train_idx], X_reshaped[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Get input shape
    input_shape = (X_train.shape[1], X_train.shape[2])
    num_classes = len(np.unique(y))

    # Train and evaluate 1D CNN
    print("\nTraining 1D CNN...")
    cnn_model = create_1d_cnn(input_shape, num_classes)
    cnn_history = cnn_model.fit(X_train, y_train,
                               epochs=20,
                               batch_size=32,
                               validation_split=0.2,
                               verbose=1)

    # Evaluate 1D CNN
    cnn_pred = np.argmax(cnn_model.predict(X_test), axis=1)

    # Calculate metrics
    cnn_metrics, cnn_cm = calculate_metrics(y_test, cnn_pred)

    # Store results
    results_cnn['accuracy'].append(cnn_metrics['accuracy'])
    results_cnn['precision'].append(cnn_metrics['precision'])
    results_cnn['recall'].append(cnn_metrics['recall'])
    results_cnn['sensitivity'].append(cnn_metrics['sensitivity'])
    results_cnn['specificity'].append(cnn_metrics['specificity'])

    # Print metrics
    print(f"\n1D CNN Metrics for fold {fold+1}:")
    print(f"Accuracy: {cnn_metrics['accuracy']:.4f}")
    print(f"Precision: {cnn_metrics['precision']:.4f}")
    print(f"Recall: {cnn_metrics['recall']:.4f}")
    print(f"Sensitivity: {cnn_metrics['sensitivity']:.4f}")
    print(f"Specificity: {cnn_metrics['specificity']:.4f}")
    print("Confusion Matrix:")
    print(cnn_cm)




=== Fold 1 - Testing on subject AP01 ===

Training 1D CNN...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.4224 - loss: 1.3986 - val_accuracy: 0.1140 - val_loss: 3.6041
Epoch 2/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5033 - loss: 1.2220 - val_accuracy: 0.1248 - val_loss: 4.3590
Epoch 3/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5375 - loss: 1.1649 - val_accuracy: 0.1140 - val_loss: 5.4096
Epoch 4/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.5619 - loss: 1.1233 - val_accuracy: 0.1169 - val_loss: 5.5956
Epoch 5/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.5669 - loss: 1.0824 - val_accuracy: 0.1212 - val_loss: 5.8108
Epoch 6/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5857 - loss: 1.0397 - val_accuracy: 0.1176 - val_loss: 6.0973
Epoch 7/20
[1m174/174[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.4875 - loss: 1.3361 - val_accuracy: 0.0786 - val_loss: 2.2980
Epoch 2/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.5437 - loss: 1.2000 - val_accuracy: 0.0914 - val_loss: 2.5771
Epoch 3/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5576 - loss: 1.1288 - val_accuracy: 0.1936 - val_loss: 2.4775
Epoch 4/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5808 - loss: 1.0825 - val_accuracy: 0.1986 - val_loss: 2.4957
Epoch 5/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5697 - loss: 1.0771 - val_accuracy: 0.1421 - val_loss: 2.7427
Epoch 6/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5853 - loss: 1.0444 - val_accuracy: 0.1507 - val_loss: 3.1654
Epoch 7/20
[1m175/175[0m [32m━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.3847 - loss: 1.4385 - val_accuracy: 0.0787 - val_loss: 2.2327
Epoch 2/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4678 - loss: 1.3316 - val_accuracy: 0.0808 - val_loss: 2.3020
Epoch 3/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4851 - loss: 1.2434 - val_accuracy: 0.1276 - val_loss: 2.1795
Epoch 4/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4987 - loss: 1.2269 - val_accuracy: 0.1006 - val_loss: 2.9100
Epoch 5/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5175 - loss: 1.1718 - val_accuracy: 0.0999 - val_loss: 3.1965
Epoch 6/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5239 - loss: 1.1541 - val_accuracy: 0.1169 - val_loss: 3.4517
Epoch 7/20
[1m177/177[0m [32m━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.4559 - loss: 1.3754 - val_accuracy: 0.0814 - val_loss: 2.3182
Epoch 2/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5714 - loss: 1.1327 - val_accuracy: 0.0990 - val_loss: 2.7799
Epoch 3/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6105 - loss: 1.0440 - val_accuracy: 0.1026 - val_loss: 3.0548
Epoch 4/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6206 - loss: 0.9999 - val_accuracy: 0.1540 - val_loss: 2.5709
Epoch 5/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6327 - loss: 0.9809 - val_accuracy: 0.1452 - val_loss: 2.7467
Epoch 6/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6418 - loss: 0.9451 - val_accuracy: 0.1452 - val_loss: 2.8995
Epoch 7/20
[1m171/171[0m [32m━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.4982 - loss: 1.3281 - val_accuracy: 0.1556 - val_loss: 2.0682
Epoch 2/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.5754 - loss: 1.1608 - val_accuracy: 0.1583 - val_loss: 2.0716
Epoch 3/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.6091 - loss: 1.0677 - val_accuracy: 0.1625 - val_loss: 2.2860
Epoch 4/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6164 - loss: 1.0315 - val_accuracy: 0.1722 - val_loss: 2.1146
Epoch 5/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6309 - loss: 0.9817 - val_accuracy: 0.1722 - val_loss: 2.3140
Epoch 6/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.6388 - loss: 0.9694 - val_accuracy: 0.1736 - val_loss: 2.3846
Epoch 7/20
[1m180/180[0m [32m━━━━━━━

In [13]:

# Calculate and print aggregated results
def print_aggregated_results(results, model_name):
    print(f"\n=== Aggregated Results for {model_name} ===")
    for metric in results:
        values = results[metric]
        print(f"{metric.capitalize()}: Mean = {np.mean(values):.4f}, Std = {np.std(values):.4f}")

print_aggregated_results(results_cnn, "1D CNN")



=== Aggregated Results for 1D CNN ===
Accuracy: Mean = 0.3305, Std = 0.1495
Precision: Mean = 0.3004, Std = 0.1218
Recall: Mean = 0.3305, Std = 0.1495
Sensitivity: Mean = 0.2211, Std = 0.0399
Specificity: Mean = 0.8028, Std = 0.0186


In [14]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten, TimeDistributed, ConvLSTM1D
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt

In [15]:
# Load the sleep stage dataset
sleep_stage_df = pd.read_csv('/content/internship/Sleep_Stage_Dataset/sleep_stage_classification_dataset.csv')

# Convert datetime columns
sleep_stage_df['window_start'] = pd.to_datetime(sleep_stage_df['window_start'])
sleep_stage_df['window_end'] = pd.to_datetime(sleep_stage_df['window_end'])

# Preprocessing
# Filter out 'Unknown' and 'Artifact' stages
sleep_stage_df = sleep_stage_df[~sleep_stage_df['sleep_stage'].isin(['Unknown', 'Artifact'])]

# Encode sleep stages
stage_order = ['Wake', 'N1', 'N2', 'N3', 'REM']
label_encoder = LabelEncoder()
label_encoder.fit(stage_order)
sleep_stage_df['stage_encoded'] = label_encoder.transform(sleep_stage_df['sleep_stage'])

# Features and labels
numeric_features = ['flow_mean', 'flow_std', 'flow_min', 'flow_max',
                   'thoracic_mean', 'thoracic_std', 'thoracic_min', 'thoracic_max']
X = sleep_stage_df[numeric_features]
y = sleep_stage_df['stage_encoded']
groups = sleep_stage_df['subject'].values

# Standardize numerical features
scaler = StandardScaler()
X[numeric_features] = scaler.fit_transform(X[numeric_features])

# Convert to numpy arrays
X = X.values.astype('float32')
y = y.values

# Reshape data for ConvLSTM (samples, timesteps, features, channels)
# We'll create 5 timesteps by stacking consecutive samples
n_timesteps = 5
n_features = X.shape[1]
n_samples = X.shape[0] - n_timesteps + 1

X_reshaped = np.zeros((n_samples, n_timesteps, n_features))
y_reshaped = np.zeros(n_samples)

for i in range(n_samples):
    X_reshaped[i] = X[i:i+n_timesteps]
    # Use the label from the last timestep
    y_reshaped[i] = y[i+n_timesteps-1]

# Update groups to match new shape
groups = groups[n_timesteps-1:]

# Define Leave-One-Subject-Out cross-validator
logo = LeaveOneGroupOut()

# Initialize dictionaries to store results
results_conv_lstm = {'accuracy': [], 'precision': [], 'recall': [], 'sensitivity': [], 'specificity': []}

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[numeric_features] = scaler.fit_transform(X[numeric_features])


In [16]:
# Define model creation functions
def create_conv_lstm(input_shape, num_classes):
    model = Sequential([
        # ConvLSTM1D layer - input shape: (timesteps, features, channels)
        ConvLSTM1D(64, kernel_size=1, activation='tanh',
                  recurrent_activation='hard_sigmoid',
                  return_sequences=True,
                  input_shape=input_shape),
        # TimeDistributed wrapper for 1D operations
        TimeDistributed(Conv1D(128, kernel_size=1, activation='relu')),
        TimeDistributed(MaxPooling1D(pool_size=1)),
        TimeDistributed(Flatten()),
        LSTM(128, return_sequences=False),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    num_classes = cm.shape[0]

    metrics = {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, average='weighted', zero_division=0),
        'recall': recall_score(y_true, y_pred, average='weighted', zero_division=0),
        'sensitivity': 0,
        'specificity': 0
    }

    # Calculate sensitivity (recall for each class)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    metrics['sensitivity'] = np.mean(recalls)

    # Calculate specificity
    specificities = []
    for i in range(num_classes):
        tn = np.sum(np.delete(np.delete(cm, i, axis=0), i, axis=1))
        fp = np.sum(cm[:, i]) - cm[i, i]
        specificities.append(tn / (tn + fp))
    metrics['specificity'] = np.mean(specificities)

    return metrics, cm

In [17]:
# Perform Leave-One-Subject-Out cross-validation
for fold, (train_idx, test_idx) in enumerate(logo.split(X_reshaped, y_reshaped, groups)):
    print(f"\n=== Fold {fold+1} - Testing on subject {groups[test_idx[0]]} ===")

    # Split data
    X_train, X_test = X_reshaped[train_idx], X_reshaped[test_idx]
    y_train, y_test = y_reshaped[train_idx], y_reshaped[test_idx]

    # Get input shape
    input_shape = (X_train.shape[1], X_train.shape[2], 1)  # Adding channel dimension
    num_classes = len(np.unique(y))

    # Train and evaluate ConvLSTM
    print("\nTraining ConvLSTM...")
    conv_lstm_model = create_conv_lstm(input_shape, num_classes)
    conv_lstm_history = conv_lstm_model.fit(X_train, y_train,
                                          epochs=20,
                                          batch_size=32,
                                          validation_split=0.2,
                                          verbose=1)

    # Evaluate ConvLSTM
    conv_lstm_pred = np.argmax(conv_lstm_model.predict(X_test), axis=1)

    # Calculate metrics
    conv_lstm_metrics, conv_lstm_cm = calculate_metrics(y_test, conv_lstm_pred)

    # Store results
    results_conv_lstm['accuracy'].append(conv_lstm_metrics['accuracy'])
    results_conv_lstm['precision'].append(conv_lstm_metrics['precision'])
    results_conv_lstm['recall'].append(conv_lstm_metrics['recall'])
    results_conv_lstm['sensitivity'].append(conv_lstm_metrics['sensitivity'])
    results_conv_lstm['specificity'].append(conv_lstm_metrics['specificity'])

    # Print metrics
    print(f"\nConvLSTM Metrics for fold {fold+1}:")
    print(f"Accuracy: {conv_lstm_metrics['accuracy']:.4f}")
    print(f"Precision: {conv_lstm_metrics['precision']:.4f}")
    print(f"Recall: {conv_lstm_metrics['recall']:.4f}")
    print(f"Sensitivity: {conv_lstm_metrics['sensitivity']:.4f}")
    print(f"Specificity: {conv_lstm_metrics['specificity']:.4f}")
    print("Confusion Matrix:")
    print(conv_lstm_cm)


=== Fold 1 - Testing on subject AP01 ===

Training ConvLSTM...


  super().__init__(**kwargs)


Epoch 1/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 112ms/step - accuracy: 0.4416 - loss: 1.4030 - val_accuracy: 0.0794 - val_loss: 6.6149
Epoch 2/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 103ms/step - accuracy: 0.5207 - loss: 1.1513 - val_accuracy: 0.1299 - val_loss: 6.6672
Epoch 3/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 103ms/step - accuracy: 0.5720 - loss: 1.0480 - val_accuracy: 0.1248 - val_loss: 8.6564
Epoch 4/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 100ms/step - accuracy: 0.5895 - loss: 0.9896 - val_accuracy: 0.1017 - val_loss: 9.7806
Epoch 5/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 110ms/step - accuracy: 0.6291 - loss: 0.9032 - val_accuracy: 0.1140 - val_loss: 10.1480
Epoch 6/20
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 121ms/step - accuracy: 0.6443 - loss: 0.8623 - val_accuracy: 0.1147 - val_loss: 11.3815
Epoch 7/

  super().__init__(**kwargs)


[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 110ms/step - accuracy: 0.5091 - loss: 1.3171 - val_accuracy: 0.0786 - val_loss: 2.2423
Epoch 2/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 100ms/step - accuracy: 0.5718 - loss: 1.1138 - val_accuracy: 0.0929 - val_loss: 2.7456
Epoch 3/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 103ms/step - accuracy: 0.6147 - loss: 0.9945 - val_accuracy: 0.1651 - val_loss: 3.7474
Epoch 4/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 102ms/step - accuracy: 0.6145 - loss: 0.9633 - val_accuracy: 0.1287 - val_loss: 4.7354
Epoch 5/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 103ms/step - accuracy: 0.6366 - loss: 0.9159 - val_accuracy: 0.1315 - val_loss: 4.9728
Epoch 6/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 108ms/step - accuracy: 0.6585 - loss: 0.8593 - val_accuracy: 0.1458 - val_loss: 5.4008
Epoch 7/20
[1m175/17

  super().__init__(**kwargs)


[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 105ms/step - accuracy: 0.4113 - loss: 1.4352 - val_accuracy: 0.0780 - val_loss: 2.7485
Epoch 2/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 105ms/step - accuracy: 0.4826 - loss: 1.2540 - val_accuracy: 0.0738 - val_loss: 4.0378
Epoch 3/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 100ms/step - accuracy: 0.5437 - loss: 1.1121 - val_accuracy: 0.0745 - val_loss: 4.7799
Epoch 4/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 98ms/step - accuracy: 0.5778 - loss: 1.0205 - val_accuracy: 0.0858 - val_loss: 5.6284
Epoch 5/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 100ms/step - accuracy: 0.6152 - loss: 0.9402 - val_accuracy: 0.0851 - val_loss: 6.0633
Epoch 6/20
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 100ms/step - accuracy: 0.6430 - loss: 0.9014 - val_accuracy: 0.0844 - val_loss: 6.0657
Epoch 7/20
[1m177/177

  super().__init__(**kwargs)


[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 101ms/step - accuracy: 0.5177 - loss: 1.3109 - val_accuracy: 0.0822 - val_loss: 3.7751
Epoch 2/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 100ms/step - accuracy: 0.6259 - loss: 1.0143 - val_accuracy: 0.1247 - val_loss: 3.8775
Epoch 3/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 100ms/step - accuracy: 0.6507 - loss: 0.9084 - val_accuracy: 0.0983 - val_loss: 4.2551
Epoch 4/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 97ms/step - accuracy: 0.6783 - loss: 0.8409 - val_accuracy: 0.1321 - val_loss: 5.0838
Epoch 5/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 95ms/step - accuracy: 0.6796 - loss: 0.8108 - val_accuracy: 0.1387 - val_loss: 5.5022
Epoch 6/20
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 101ms/step - accuracy: 0.6956 - loss: 0.7750 - val_accuracy: 0.1592 - val_loss: 5.9710
Epoch 7/20
[1m171/171

  super().__init__(**kwargs)


Epoch 1/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 105ms/step - accuracy: 0.5077 - loss: 1.3301 - val_accuracy: 0.1571 - val_loss: 1.9001
Epoch 2/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 104ms/step - accuracy: 0.5927 - loss: 1.0763 - val_accuracy: 0.1543 - val_loss: 2.2808
Epoch 3/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 100ms/step - accuracy: 0.6485 - loss: 0.9402 - val_accuracy: 0.1536 - val_loss: 2.4368
Epoch 4/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 97ms/step - accuracy: 0.6667 - loss: 0.8705 - val_accuracy: 0.1591 - val_loss: 2.4632
Epoch 5/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 116ms/step - accuracy: 0.6834 - loss: 0.8259 - val_accuracy: 0.1536 - val_loss: 2.4816
Epoch 6/20
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 111ms/step - accuracy: 0.6887 - loss: 0.8105 - val_accuracy: 0.1584 - val_loss: 2.8693
Epoch 7/20


In [18]:
# Calculate and print aggregated results
def print_aggregated_results(results, model_name):
    print(f"\n=== Aggregated Results for {model_name} ===")
    for metric in results:
        values = results[metric]
        print(f"{metric.capitalize()}: Mean = {np.mean(values):.4f}, Std = {np.std(values):.4f}")

print_aggregated_results(results_conv_lstm, "ConvLSTM")


=== Aggregated Results for ConvLSTM ===
Accuracy: Mean = 0.3232, Std = 0.1084
Precision: Mean = 0.3310, Std = 0.0952
Recall: Mean = 0.3232, Std = 0.1084
Sensitivity: Mean = 0.2111, Std = 0.0283
Specificity: Mean = 0.8031, Std = 0.0135
