# 1. Importing dependencies

In [None]:
!pip install lightgbm
!pip install dataprep

Collecting dataprep
  Downloading dataprep-0.4.5-py3-none-any.whl.metadata (14 kB)
Collecting bokeh<3,>=2 (from dataprep)
  Downloading bokeh-2.4.3-py3-none-any.whl.metadata (14 kB)
Collecting flask<3,>=2 (from dataprep)
  Downloading flask-2.3.3-py3-none-any.whl.metadata (3.6 kB)
Collecting flask_cors<4.0.0,>=3.0.10 (from dataprep)
  Downloading Flask_Cors-3.0.10-py2.py3-none-any.whl.metadata (5.4 kB)
Collecting jinja2<3.1,>=3.0 (from dataprep)
  Downloading Jinja2-3.0.3-py3-none-any.whl.metadata (3.5 kB)
Collecting jsonpath-ng<2.0,>=1.5 (from dataprep)
  Downloading jsonpath_ng-1.7.0-py3-none-any.whl.metadata (18 kB)
Collecting metaphone<0.7,>=0.6 (from dataprep)
  Downloading Metaphone-0.6.tar.gz (14 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pandas<2.0,>=1.1 (from dataprep)
  Downloading pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting pydantic<2.0,>=1.6 (from dataprep)
  Downloading pydantic-1.10.19-cp310-

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.metrics import mean_absolute_error as mae
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [None]:
import random
import os
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(42)

# 2. Loading Data

In [None]:
# Loading data
train = pd.read_csv('Train.csv')
test = pd.read_csv('Test.csv')


FileNotFoundError: [Errno 2] No such file or directory: 'Train.csv'

In [None]:
# Preview train dataset
train.head()

In [None]:
# Preview test dataset
test.head()

# 3. Data Exploration and Visualization

In [None]:
def ECG_analysis(Data, ID):
    for j in range(0,4):
        channels=[]
        for i in range(0,128):
            channels.append('channel_'+str(j)+'_timestep_'+str(i))
        plt.figure(figsize=(20,5))
        plt.plot(range(0,128),Data[channels].iloc[ID])
        plt.title('channel : ' + str(j) + ' for user 1865')
ECG_analysis(train, 0)

## Distributions plots and comments

In [None]:
train['valence is'] =  np.where(train['valence']<= 1, 'low', 'high')
train['arousal is'] = np.where(train['arousal']<= 3, 'low', 'high')

# 3. Statistical summaries

In [None]:
# Train statistical summary
train.describe(include = 'all')

# 4. Missing values

In [None]:
def missing_columns(dataframe):
    """
    Returns a dataframe that contains missing column names and
    percent of missing values in relation to the whole dataframe.

    dataframe: dataframe that gives the column names and their % of missing values
    """

    # find the missing values
    missing_values = dataframe.isnull().sum().sort_values(ascending=False)

    # percentage of missing values in relation to the overall size
    missing_values_pct = 100 * missing_values/len(dataframe)

    # create a new dataframe which is a concatinated version
    concat_values = pd.concat([missing_values, missing_values/len(dataframe),missing_values_pct.round(1)],axis=1)

    # give new col names
    concat_values.columns = ['Missing Count','Missing Count Ratio','Missing Count %']

    # return the required values
    return concat_values[concat_values.iloc[:,1]!=0]

In [None]:
missing_columns(train).head(40).index

Index([], dtype='object')

In [None]:
missing_columns(test).head(5)

Unnamed: 0,Missing Count,Missing Count Ratio,Missing Count %


# 5. Pre-processing & Feature Engineering

##Feature Extraction:
We will use EEG signals to generate power bands and then other indexes such as ASI and BLI.

[4-8]Hz: theta band

[8-13]Hz: alpha band

[13-30]Hz: beta band


[25-45]Hz: gamma band

In [None]:
def calc_total(n,DataFrame):
    channels=[]
    for i in range(0,128):
        channels.append('channel_'+str(n)+'_timestep_'+str(i))
    DataFrame['std_channel_'+str(n)] =  DataFrame[channels].std(axis=1)
    DataFrame['q1_channel_'+str(n)] =  DataFrame[channels].quantile(q=0.1,axis=1)
    DataFrame['q2_channel_'+str(n)] =  DataFrame[channels].quantile(q=0.2,axis=1)
    DataFrame['q3_channel_'+str(n)] =  DataFrame[channels].quantile(q=0.3,axis=1)
    DataFrame['q4_channel_'+str(n)] =  DataFrame[channels].quantile(q=0.4,axis=1)
    DataFrame['q5_channel_'+str(n)] =  DataFrame[channels].quantile(q=0.5,axis=1)
    DataFrame['q6_channel_'+str(n)] =  DataFrame[channels].quantile(q=0.6,axis=1)
    DataFrame['q7_channel_'+str(n)] =  DataFrame[channels].quantile(q=0.7,axis=1)
    DataFrame['q8_channel_'+str(n)] =  DataFrame[channels].quantile(q=0.8,axis=1)
    DataFrame['q9_channel_'+str(n)] =  DataFrame[channels].quantile(q=0.9,axis=1)


    DataFrame = DataFrame.drop(columns=channels)


    return DataFrame


for i in range(0,14):
    train=calc_total(i,train)
    test=calc_total(i,test)

train = train.drop(['ID'],axis='columns')
test = test.drop(['ID'],axis='columns')

In [None]:
train.head()

Unnamed: 0,valence,arousal,valence is,arousal is,std_channel_0,q1_channel_0,q2_channel_0,q3_channel_0,q4_channel_0,q5_channel_0,...,std_channel_13,q1_channel_13,q2_channel_13,q3_channel_13,q4_channel_13,q5_channel_13,q6_channel_13,q7_channel_13,q8_channel_13,q9_channel_13
0,5.0,6.0,high,high,1.079372,-0.921184,-0.670085,-0.218395,0.092659,0.358199,...,2.294949,-2.70946,-1.826675,-1.186146,-0.701064,-0.093284,0.564866,1.368542,2.367223,2.926463
1,1.0,7.0,low,high,3.02642,-3.34327,-1.695489,-0.886067,-0.277202,0.374734,...,7.532506,-9.4409,-4.639735,-2.966572,-1.383055,-0.094339,1.141791,3.608065,7.021057,9.983664
2,1.0,6.0,low,high,7.322093,-8.57733,-6.206063,-3.287634,-2.064329,-0.652447,...,11.27392,-14.965158,-8.756317,-5.022458,-3.402868,-0.195952,2.510057,4.368473,9.393966,15.455546
3,2.0,8.0,high,high,5.285854,-6.00223,-3.864026,-1.861933,-1.013372,0.61918,...,4.942462,-5.513996,-4.614695,-2.48469,-1.331437,-0.7729,0.227793,2.449084,4.551273,6.453034
4,1.0,7.0,low,high,5.761645,-6.110167,-3.68283,-2.404383,-1.202972,-0.388403,...,5.1353,-7.140975,-4.605986,-3.229812,-1.537471,0.276957,1.322975,2.779864,4.333154,6.885472


In [None]:
train.to_csv('trainUpdated.csv',index=False)
test.to_csv('testUpdated.csv',index=False)


# 6. Model Building

In [None]:
scaler = Normalizer()
y_pred=[]
y = train[['valence','arousal']]
X = train.drop(['valence','arousal'],axis='columns')

X_transformed = scaler.fit_transform(X)
X = pd.DataFrame(X_transformed, columns=X.columns)
test_df_transformed = scaler.fit_transform(test)
test = pd.DataFrame(test_df_transformed, columns=test.columns)

scores_knn = []
kfold = KFold(n_splits=15, shuffle=True, random_state=42)

i=1
for train_index, test_index in kfold.split(X,y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    knn = MultiOutputRegressor(KNeighborsRegressor(n_neighbors = 1,  weights = 'distance', p=1))
    knn.fit(X_train, y_train)
    mean_accuracy=knn.score(X_test,y_test)
    print(mean_accuracy)
    preds=knn.predict(X_test)
    print("MAE Test ("+ str(i) +") = " + str(mae(y_test,preds)) )
    scores_knn.append((mae(y_test,preds)))
    y_pred.append(knn.predict(test))
    i +=1;

print('MAE Average : ', sum(scores_knn)/15)

plt.figure(figsize=(20,5))
plt.plot(range(0,15),scores_knn)
plt.title('MAE for each fold')


In [None]:
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
i = 1
scores_svm = []
kfold = KFold(n_splits=15, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(X,y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    svm = MultiOutputRegressor(SVR(kernel='rbf', C=1e3, gamma=0.1))
    svm.fit(X_train, y_train)
    mean_accuracy=svm.score(X_test,y_test)
    print(mean_accuracy)
    preds=svm.predict(X_test)
    print("MAE Test ("+ str(i) +") = " + str(mae(y_test,preds)) )
    scores_svm.append((mae(y_test,preds)))
    y_pred.append(svm.predict(test))
    i +=1;

print('MAE Average : ', sum(scores_svm)/15)

plt.figure(figsize=(20,5))
plt.plot(range(0,15),scores_svm)
plt.title('MAE for each fold')

In [None]:
RandomForestRegressor
rf = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
i = 1
scores_rf = []
kfold = KFold(n_splits=3, shuffle=True, random_state=42)
for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    rf.fit(X_train, y_train)
    mean_accuracy=knn.score(X_test,y_test)
    print(mean_accuracy)
    preds_rf = rf.predict(X_test)

    print("MAE Test (RandomForest, " + str(i) + ") = " + str(mae(y_test, preds_rf)))
    scores_rf.append((mae(y_test, preds_rf)))

    y_pred.append(rf.predict(test))
    i += 1

print('MAE Average (RandomForest): ', sum(scores_rf) / 3)
plt.figure(figsize=(20,5))
plt.plot(range(0,3),scores_rf)
plt.title('MAE for each fold')

In [None]:
import streamlit as st
import pandas as pd
from joblib import load
from io import BytesIO

# Load model (assumes model is already saved as 'random_forest_model.pkl')
model = load("random_forest_model.pkl")

def process_eeg_data(df):
    # Get predictions from the model
    predictions = model.predict(df)
    
    # Loop through the predictions and handle multiple values if necessary
    report = []
    for index, prediction in enumerate(predictions):
        try:
            valence = float(prediction[0]) 
            arousal = float(prediction[1]) 
            report_item = assess_emotions(valence, arousal)
            report.append(report_item)
        except ValueError as e:
            print(f"Error unpacking prediction: {e}")
            continue

    return report

def assess_emotions(valence, arousal):
    """
    Assess emotional states based on valence and arousal values.
    """
    results = {
        "Stress": stress_metrics(valence, arousal),
        "Insomnia": insomnia_metrics(valence, arousal),
        "Anxiety": anxiety_metrics(valence, arousal),
    }

    report = ""
    for condition, result in results.items():
        report += format_condition_report(condition, result)

    return report

def format_condition_report(condition, result):
    """
    Format the condition's report with colors and severity.
    """
    valence, arousal, severity, color = result
    icon = get_condition_icon(severity)
    formatted_text = f"""
    ### {condition} {icon}

    <div style="background-color: {color}; padding: 10px; border-radius: 5px; color: white; font-weight: bold;">
        {severity} - {condition}
    </div>
    """
    return formatted_text

def get_condition_icon(severity):
    """
    Get the appropriate emoji icon for the severity level.
    """
    if severity == "SEVERE":
        return "🔥"
    elif severity == "MODERATE":
        return "⚠️"
    elif severity == "MILD":
        return "💛"
    else:
        return "✅"

# Dummy functions for stress, insomnia, and anxiety metrics
def stress_metrics(valence, arousal):
    if valence < 5 and arousal > 5:
        if 0 <= valence <= 2 and 7 <= arousal <= 9:
            return [valence, arousal, "SEVERE", "red"]
        elif 2 < valence <= 4 and 6 <= arousal <= 7:
            return [valence, arousal, "MODERATE", "orange"]
        elif 4 < valence <= 5 and 5 <= arousal <= 6:
            return [valence, arousal, "MILD", "yellow"]
        else:
            return [valence, arousal, "NORMAL", "green"]
    else:
        return [valence, arousal, "GOOD", "green"]

def insomnia_metrics(valence, arousal):   
    if valence < 4 and arousal > 2:
        if 0 <= valence <= 1.5 and 8.5 <= arousal <= 10:
            return [valence, arousal, "SEVERE", "red"]
        elif 1.5 < valence <= 3 and 7.5 <= arousal <= 8.5:
            return [valence, arousal, "MODERATE", "orange"]
        elif 3 < valence <= 4 and 6 <= arousal <= 7.5:
            return [valence, arousal, "MILD", "yellow"]
        else:
            return [valence, arousal, "NORMAL", "green"]
    else:
        return [valence, arousal, "GOOD", "green"]

def anxiety_metrics(valence, arousal):
    # Severe Anxiety: Very low positive emotion and high arousal
    if 0 <= valence <= 2 and 8 <= arousal <= 10:
        return [valence, arousal, "SEVERE", "red"]
    
    # Moderate Anxiety: Low positive emotion and high-moderate arousal
    elif 2 < valence <= 3 and 7 <= arousal <= 8:
        return [valence, arousal, "MODERATE", "orange"]
    
    # Mild Anxiety: Somewhat low positive emotion and moderate arousal
    elif 3 < valence <= 4 and 6 <= arousal <= 7:
        return [valence, arousal, "MILD", "yellow"]
    
    # Normal Anxiety: Low-moderate anxiety levels
    elif 4 < valence <= 5 and 5 <= arousal <= 6:
        return [valence, arousal, "NORMAL", "green"]
    
    # Good Emotional State: Higher positive emotion and lower arousal
    else:
        return [valence, arousal, "GOOD", "green"]

# Streamlit UI
st.title('EEG Disease Detection and Emotional State Analysis')

st.write("""
    Upload your EEG data in CSV, Excel, or TXT format to determine the presence of diseases/disorders like stress, insomnia, or anxiety.
""")

# Upload file
uploaded_file = st.file_uploader("Choose a file (CSV, Excel, or TXT)", type=["csv", "xlsx", "txt"])

if uploaded_file is not None:
    try:
        # Determine the file type and read it accordingly
        file_extension = uploaded_file.name.split('.')[-1].lower()

        if file_extension == 'csv':
            df = pd.read_csv(uploaded_file)
        elif file_extension == 'xlsx':
            df = pd.read_excel(uploaded_file)
        elif file_extension == 'txt':
            # Read a text file assuming whitespace delimiters (can adjust if necessary)
            text = uploaded_file.read().decode('utf-8')
            df = pd.read_csv(BytesIO(text.encode()), delim_whitespace=True)

        # Show a preview of the data
        st.subheader('Preview of Uploaded Data')
        st.write(df.head())

        # Process the data and get the report
        report = process_eeg_data(df)

        st.subheader("Disease/Disorder Report")
        for entry in report:
            st.markdown(entry, unsafe_allow_html=True)
    
    except Exception as e:
        st.error(f"Error processing the file: {e}")


In [None]:
import pickle

with open('knn_model.pkl', 'wb') as knn_file:
    pickle.dump(knn, knn_file)

with open('svm_model.pkl', 'wb') as svm_file:
    pickle.dump(svm, svm_file)

with open('random_forest_model.pkl', 'wb') as rf_file:
    pickle.dump(rf, rf_file)

# Choose the best model based on average MAE
average_mae_knn = sum(scores_knn) / 15
average_mae_svm = sum(scores_svm) / 15
average_mae_rf = sum(scores_rf) / 3

best_model_name = min(
    [("KNN", average_mae_knn), ("SVM", average_mae_svm), ("Random Forest", average_mae_rf)],
    key=lambda x: x[1]
)[0]

print("Best Model: ", best_model_name)

# Load the best model
if best_model_name == "KNN":
    with open('knn_model.pkl', 'rb') as knn_file:
        best_model = pickle.load(knn_file)
elif best_model_name == "SVM":
    with open('svm_model.pkl', 'rb') as svm_file:
        best_model = pickle.load(svm_file)
else:  # Random Forest
    with open('random_forest_model.pkl', 'rb') as rf_file:
        best_model = pickle.load(rf_file)



# 7. Predictions

In [None]:
# Make predictions on the test data using the best model
best_model_predictions = best_model.predict(test)
print("Predictions using the best model:")
print(best_model_predictions)


In [None]:
def generate_detailed_report(model, df):
    # text = text.decode('utf-8')
    # df = pd.read_csv(StringIO(text), delim_whitespace=True)
    prediction = model.predict(df)[0]
    print(prediction)
    valence, arousal = prediction[0], prediction[1]
    report = f"\nPrediction:\nValence: {valence}\nArousal: {arousal}\n"

    if valence < 5 and arousal > 5:
        report += "Stress: Yes\n"
        if 0 <= valence <= 2 and 7 <= arousal <= 9:
            report += "   - Severity: Severe Stress\n"
            report += "   - Description: The individual is likely experiencing intense and severe Stress.\n"
            return [valence,arousal,"SEVERE"]
            background_color = 'red'
        elif 2 < valence <= 4 and 6 <= arousal <= 7:
            report += "   - Severity: Moderate Stress\n"
            report += "   - Description: The individual is likely experiencing moderate levels of Stress.\n"
            return [valence,arousal,"MODERATE"]
            background_color = 'orange'
        elif 4 < valence <= 5 and 5 <= arousal <= 6:
            report += "   - Severity: Light Stress\n"
            report += "   - Description: The individual is likely experiencing mild or light Stress.\n"
            return [valence,arousal,"LIGHT"]
            background_color = 'yellow'
        else:
            report += "   - Severity: Normal Stress\n"
            report += "   - Description: The individual is likely experiencing normal levels of Stress.\n"
            return [valence,arousal,"NORMAL"]
            background_color = 'green'
    else:
        report += "Stress: No\n"
        report += "   - Description: The individual is not showing signs of Stress.\n"
        background_color = 'green'
        return [valence,arousal,"GOOD"]

