In [1]:
# Basic imports
import numpy as np
import pandas as pd

from dataframe.csv_utils import (
    load_data_from_csv,
    get_filtered_data,
)

# sklearn imports
from sklearn.decomposition import PCA  # Principal Component Analysis
from sklearn.manifold import TSNE  # T-Distributed Stochastic Neighbor Embedding

# plotly imports
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode

CLEANED_BETA_SUBJECT = [
    "../CleandDataV1/2017",
    "../CleandDataV1/2018",
    "../CleandDataV1/2020",
    "../CleandDataV1/2024",
    "../CleandDataV1/2025",
]

SORTED_BLOCK_NAMES = [
    "a_hvha",
    "a_hvla",
    "a_nvha",
    "a_nvla",
    "b_hvha",
    "b_hvla",
    "b_nvha",
    "b_nvla",
    "medi",
    "wandering",
]

FOUR_CONDITIONS =  ["audio"]*4 + ["breath"]*4 + ["meditation","wandering"]

In [2]:
"""
    load features from csv
"""
dir_name = "eeg_features1"
result = load_data_from_csv(dir_name)

all_blocks = []
for b in FOUR_CONDITIONS:
    all_blocks.extend([b] * 13)

In [3]:
# This is needed so we can display plotly plots properly
init_notebook_mode(connected=True)

In [25]:
from umap import UMAP

import plotly.express as px
from plotly.offline import plot
from plotly.subplots import make_subplots

def get_filter_pattern(feature: str='ALL') -> str:
    # get_other feature_pattern = ".*(?<!GAMMA|BETA2|BETA1|ALPHA|THETA|DELTA)$"
    if feature == 'ALL':
        return '.*(?<!sdf)$'
    return f".*{feature}$" #f"^{feature}.*(?<!BETA2)$"  

def get_umap(feature: str, result: pd.DataFrame, subjects: list=[]):
    pattern = get_filter_pattern(feature)
    normalized_eeg_features, filtered_result, label_list = get_filtered_data(result, subjects, pattern)
    print(normalized_eeg_features.shape)
    
    # Run UMAP
    umap2d = UMAP(n_components=2, init="random", random_state=0)
    proj_2d = pd.DataFrame(umap2d.fit_transform(normalized_eeg_features))
    
    # Concatanate the umap points and original data
    filtered_result = filtered_result.reset_index()
    filtered_result['condition'] = all_blocks*len(subjects)
    proj_2d.columns = ['C1_2d', 'C2_2d']
    return pd.concat([filtered_result, proj_2d], axis=1, join="inner")

def save_result(feature: str, subjects: list, plotX: pd.DataFrame):
    title = f'EEG {feature} (average spectral power per trial)'

    figures = []
    for subj in subjects:
        df = plotX[plotX["Subject"] == subj]
        figures.append(px.scatter(df,x='C1_2d', y='C2_2d',color="condition"))

    fig = make_subplots(
        rows=int((len(figures)/2)+(len(figures)%2)), 
        cols=2, 
        subplot_titles=subjects, 
        horizontal_spacing=0.1, 
        vertical_spacing=0.05,
    ) 

    for i, figure in enumerate(figures):
        showlegend = True if i == 0 else False
        for trace in range(len(figure["data"])):
            figure["data"][trace].update(showlegend=showlegend)
            fig.append_trace(figure["data"][trace], row=int(i / 2 + 1), col=int(i % 2 + 1))

    fig.update_layout(height=1000, width=1000, title_text=title, margin = dict(r=0, b=0, l=0))
    
    name = ','.join(str(s) for s in subjects)
    
    fig.write_image(f"results/{feature}_{name}.png")
    fig.show()

In [26]:
name_to_batch = {
    '2017-2028': ['2017', '2018',  '2024', '2025', '2026', '2028'],
    '2029-2036': [ '2029', '2031', '2032', '2033', '2035', '2036'],
    '2039-2045': [ '2039', '2040', '2041', '2042', '2043', '2044', '2045'],
}
subjects = [2017, 2018]

feature = 'ALL'
plotX = get_umap(feature, result, subjects)
save_result(feature, subjects, plotX)

(260, 768) 768 260
(260, 768)


In [27]:
fig = px.scatter(
    plotX, 
    x='C1_2d', 
    y='C2_2d', 
    color="condition", 
    facet_col="Subject", 
    facet_col_wrap=1,
)
fig.show()

In [None]:
# reference https://www.mikulskibartosz.name/pca-how-to-choose-the-number-of-components/
reduced_data = PCA(n_components=0.95).fit_transform(normalized_eeg_features)
normalized_eeg_features = pd.DataFrame(reduced_data)
normalized_eeg_features.head()

In [1]:
# from sklearn.mixture import GaussianMixture
# import matplotlib.pyplot as plt

# gmm = GaussianMixture(n_components=2).fit(normalized_eeg_features)
# clusters = gmm.predict(normalized_eeg_features)

In [None]:
from sklearn.cluster import AgglomerativeClustering
import time

print("Compute unstructured hierarchical clustering...")
st = time.time()
ward = AgglomerativeClustering(n_clusters=4, linkage="ward").fit(
    normalized_eeg_features
)
elapsed_time = time.time() - st
clusters = ward.labels_
print(f"Elapsed time: {elapsed_time:.2f}s")
print(f"Number of points: {clusters.size}")

In [None]:
from dataframe.visualization import pca_2d

# Add the cluster vector to our DataFrame, X
normalized_eeg_features["Cluster"] = clusters
# normalized_eeg_features['Block'] = all_blocks

pca_2d(
    normalized_eeg_features,
    4,
    [
        "rgba(255, 128, 255, 0.8)",
        "rgba(255, 128, 2, 0.8)",
        "rgba(0, 255, 200, 0.8)",
        "rgba(0, 128, 200, 0.8)",
    ],
    title,
    False,
    mode="markers",
    textfont=dict(size=10),
)

In [None]:
from plotly.subplots import make_subplots


def get_label_str(label_list: list) -> list:
    label_map = ["nvla", "nvha", "hvla", "hvha"]
    return [label_map[v] for v in label_list]


# Define color sets of paintings
colors_map = {
    "a_hvha": "rgb(177, 127, 38)",
    "a_hvla": "rgb(205, 152, 36)",
    "a_nvha": "rgb(99, 79, 37)",
    "a_nvla": "rgb(124, 103, 37)",
    "b_hvha": "rgb(33, 75, 99)",
    "b_hvla": "rgb(79, 129, 102)",
    "b_nvha": "rgb(151, 179, 100)",
    "b_nvla": "rgb(175, 49, 35)",
    "medi": "rgb(18, 36, 37)",
    "wandering": "rgb(56, 75, 126)",
    "nvla": "rgb(175, 49, 35)",
    "nvha": "rgb(99, 79, 37)",
    "hvla": "rgb(205, 152, 36)",
    "hvha": "rgb(33, 75, 99)",
}

# Create subplots, using 'domain' type for pie charts
specs = [
    [{"type": "domain"}, {"type": "domain"}],
    [{"type": "domain"}, {"type": "domain"}],
]
fig = make_subplots(rows=2, cols=2, specs=specs)


label_df = pd.DataFrame(get_label_str(label_list))
# Rename the columns
label_df.columns = ["label"]
plotX = pd.concat([normalized_eeg_features, label_df], axis=1, join="inner")

num_cluster = 4
groupBy = "label"
# Define pie charts
for i in range(num_cluster):
    c = plotX[plotX["Cluster"] == i].groupby([groupBy]).agg({groupBy: "count"})
    labels = c.index
    values = c[groupBy].tolist()
    colors = [colors_map[l] for l in labels]
    fig.add_trace(
        go.Pie(
            labels=labels,
            values=values,
            title_text=f"Cluster {i}",
            textinfo="label+percent",
            marker_colors=colors,
        ),
        int(i / 2 + 1),
        int(i % 2 + 1),
    )

# Tune layout and hover info
fig.update(
    layout_title_text=title,
    layout_showlegend=False,
)

fig = go.Figure(fig)
fig.show()

In [None]:
# Clustering
# kmeans = KMeans(init="k-means++", n_clusters=4, n_init=2)
# kmeans.fit(normalized_eeg_features)
# #Find which cluster each data-point belongs to
# clusters = kmeans.predict(normalized_eeg_features)


# from sklearn.cluster import DBSCAN
# from sklearn import metrics

# db = DBSCAN(eps=7, min_samples=10).fit(normalized_eeg_features)
# clusters = db.labels_

# # Number of clusters in labels, ignoring noise if present.
# n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
# n_noise_ = list(clusters).count(-1)

# print("Estimated number of clusters: %d" % n_clusters_)
# print("Estimated number of noise points: %d" % n_noise_)
# clusters

In [None]:
# visualization via t-SNE
# Set our perplexity
perplexity = 50
# T-SNE with one dimension
tsne_1d = TSNE(n_components=1, perplexity=perplexity)

# T-SNE with two dimensions
tsne_2d = TSNE(n_components=2, perplexity=perplexity)

# T-SNE with three dimensions
tsne_3d = TSNE(n_components=3, perplexity=perplexity)
# This DataFrame holds a single dimension,built by T-SNE
s_1d = pd.DataFrame(
    tsne_1d.fit_transform(normalized_eeg_features.drop(["Cluster"], axis=1))
)

# This DataFrame contains two dimensions, built by T-SNE
s_2d = pd.DataFrame(
    tsne_2d.fit_transform(normalized_eeg_features.drop(["Cluster"], axis=1))
)

# And this DataFrame contains three dimensions, built by T-SNE
s_3d = pd.DataFrame(
    tsne_3d.fit_transform(normalized_eeg_features.drop(["Cluster"], axis=1))
)

s_1d.columns = ["TC1_1d"]

# "TC1_2d" means: 'The first component of the components created for 2-D visualization, by T-SNE.'
# And "TC2_2d" means: 'The second component of the components created for 2-D visualization, by T-SNE.'
s_2d.columns = ["TC1_2d", "TC2_2d"]

s_3d.columns = ["TC1_3d", "TC2_3d", "TC3_3d"]
method = "t-SNE"