In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load in features_norm_df_final.csv
features_df_norm = pd.read_csv("features_df_final_with_rel.csv")

# Function to plot a polar bar chart for a given feature with correction color coding
def plot_feature(df, feature_name, correction_col='correction_info'):
    if feature_name not in df.columns:
        raise ValueError(f"Feature '{feature_name}' not found in DataFrame.")
    if correction_col not in df.columns:
        raise ValueError(f"Correction info column '{correction_col}' not found in DataFrame.")
    
    # Sort the data by feature values
    df_sorted = df.sort_values(by=feature_name)
    feature_values = df_sorted[feature_name].values
    correction_info = df_sorted[correction_col].values
    
    # Compute angles for each bar
    N = len(feature_values)
    theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
    
    # Radius for each bar (normalized)
    radii = feature_values / np.max(feature_values)
    
    # Width of each bar
    width = 2 * np.pi / N
    
    # Unique corrections and color map
    unique_corrections = np.unique(correction_info)
    # Create a color map for each correction
    color_map = plt.get_cmap('tab20', len(unique_corrections))
    color_dict = {correction: color_map(i) for i, correction in enumerate(unique_corrections)}
    
    # Colors for each bar based on correction category
    bar_colors = [color_dict[correction] for correction in correction_info]
    
    # Create the polar plot
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
    bars = ax.bar(theta, radii, width=width, bottom=0.0, color=bar_colors, alpha=0.5)
    
    # Add title
    ax.set_title(feature_name, fontsize=14)
    
    # Create a legend
    handles = [plt.Line2D([0], [0], color=color_map(i), lw=4) for i in range(len(unique_corrections))]
    ax.legend(handles, unique_corrections, title='Correction Info', bbox_to_anchor=(1.1, 1.1))
    
    plt.show()

# Example usage
feature_to_plot = 'envelope_change_integral_norm'  # Change this to plot different features
plot_feature(features_df_norm, feature_to_plot)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load in features_norm_df_final.csv
features_df_norm = pd.read_csv("features_df_final_with_rel.csv")

# Function to plot a polar bar chart for a given feature with correction color coding
def plot_feature(ax, df, feature_name, correction_col='correction_info'):
    if feature_name not in df.columns:
        raise ValueError(f"Feature '{feature_name}' not found in DataFrame.")
    if correction_col not in df.columns:
        raise ValueError(f"Correction info column '{correction_col}' not found in DataFrame.")
    
    # Check if DataFrame is empty
    if df.empty:
        return
    
    # Drop rows with NaN values in the feature column
    df = df.dropna(subset=[feature_name])
    
    # If no valid data after dropping NaNs, return
    if df.empty:
        return
    
    # Sort the data by feature values
    df_sorted = df.sort_values(by=feature_name)
    feature_values = df_sorted[feature_name].values
    correction_info = df_sorted[correction_col].values
    
    # Compute angles for each bar
    N = len(feature_values)
    theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
    
    # Radius for each bar (normalized)
    radii = feature_values / np.nanmax(feature_values)  # Use np.nanmax to handle NaNs
    
    # Width of each bar
    width = 2 * np.pi / N
    
    # Unique corrections and color map
    unique_corrections = np.unique(correction_info)
    color_map = plt.get_cmap('tab20', len(unique_corrections))
    color_dict = {correction: color_map(i) for i, correction in enumerate(unique_corrections)}
    
    # Colors for each bar based on correction category
    bar_colors = [color_dict[correction] for correction in correction_info]
    
    # Create the polar plot without bar edges
    bars = ax.bar(theta, radii, width=width, bottom=0.0, color=bar_colors, alpha=0.7, edgecolor='none')
    
    # Customize grid lines
    ax.yaxis.grid(True, linestyle='--', color='grey', alpha=0.5)
    ax.xaxis.grid(True, linestyle='--', color='grey', alpha=0.5)
    
    # Set title and labels
    ax.set_title(feature_name, fontsize=16, fontweight='bold')
    
    # Simplify theta tick labels
    theta_ticks = np.linspace(0, 2 * np.pi, num=8, endpoint=False)
    ax.set_xticks(theta_ticks)
    ax.set_xticklabels([f'{int(np.degrees(tick))}°' for tick in theta_ticks], fontsize=10, color='black')

    # Create a legend
    handles = [plt.Line2D([0], [0], color=color_map(i), lw=4) for i in range(len(unique_corrections))]
    ax.legend(handles, unique_corrections, title='Correction Info', bbox_to_anchor=(1.1, 1.1), fontsize=10)

# Function to plot a grid of polar bar charts for each modality
def plot_grid_for_modalities(df, feature_name, modality_col='modality', correction_col='correction_info'):
    unique_modalities = df[modality_col].unique()
    n_modalities = len(unique_modalities)
    
    # Determine grid size (e.g., 2x2, 3x3, etc.)
    grid_size = int(np.ceil(np.sqrt(n_modalities)))
    
    fig, axs = plt.subplots(grid_size, grid_size, figsize=(grid_size*8, grid_size*8), subplot_kw=dict(polar=True))
    axs = axs.flatten()
    
    for i, modality in enumerate(unique_modalities):
        modality_df = df[df[modality_col] == modality]
        if modality_df.empty:
            continue
        plot_feature(axs[i], modality_df, feature_name, correction_col)
        axs[i].set_title(f"{feature_name}\n{modality}", fontsize=14, fontweight='bold')
    
    # Hide any unused subplots
    for j in range(i + 1, grid_size * grid_size):
        fig.delaxes(axs[j])
    
    plt.tight_layout()
    plt.show()

# Example usage
feature_to_plot = 'flux_Gmean_rel'  # Change this to plot different features
plot_grid_for_modalities(features_df_norm, feature_to_plot)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load in features_norm_df_final.csv
features_df_norm = pd.read_csv("features_df_final_with_rel.csv")

# Function to plot a polar bar chart for a given feature with horizontal half bar color coding
def plot_feature(ax, df, feature_name, correction_col='correction_info', concept_col='concept_id'):
    if feature_name not in df.columns:
        raise ValueError(f"Feature '{feature_name}' not found in DataFrame.")
    if correction_col not in df.columns:
        raise ValueError(f"Correction info column '{correction_col}' not found in DataFrame.")
    if concept_col not in df.columns:
        raise ValueError(f"Concept column '{concept_col}' not found in DataFrame.")
    
    # Check if DataFrame is empty
    if df.empty:
        return
    
    # Drop rows with NaN values in the feature column
    df = df.dropna(subset=[feature_name])
    
    # If no valid data after dropping NaNs, return
    if df.empty:
        return
    
    # Sort the data by feature values
    df_sorted = df.sort_values(by=feature_name)
    feature_values = df_sorted[feature_name].values
    correction_info = df_sorted[correction_col].values
    concept_ids = df_sorted[concept_col].values
    
    # Compute angles for each bar
    N = len(feature_values)
    theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
    
    # Radius for each bar (normalized)
    radii = feature_values / np.nanmax(feature_values)  # Use np.nanmax to handle NaNs
    
    # Width of each bar
    width = 2 * np.pi / N
    
    # Unique corrections and concept IDs, and color maps
    unique_corrections = np.unique(correction_info)
    unique_concepts = np.unique(concept_ids)
    correction_color_map = plt.get_cmap('tab20', len(unique_corrections))
    concept_color_map = plt.get_cmap('tab10', len(unique_concepts))
    correction_color_dict = {correction: correction_color_map(i) for i, correction in enumerate(unique_corrections)}
    concept_color_dict = {concept: concept_color_map(i) for i, concept in enumerate(unique_concepts)}
    
    # Colors for each bar based on correction category and concept ID
    correction_bar_colors = [correction_color_dict[correction] for correction in correction_info]
    concept_bar_colors = [concept_color_dict[concept] for concept in concept_ids]
    
    # Create the polar plot with half bars
    for i in range(N):
        ax.bar(theta[i], radii[i]/2, width=width, bottom=0.0, color=concept_bar_colors[i], alpha=0.7, edgecolor='none')
        ax.bar(theta[i], radii[i]/2, width=width, bottom=radii[i]/2, color=correction_bar_colors[i], alpha=0.7, edgecolor='none')
    
    # Customize grid lines
    ax.yaxis.grid(True, linestyle='--', color='grey', alpha=0.5)
    ax.xaxis.grid(True, linestyle='--', color='grey', alpha=0.5)
    
    # Set title and labels
    ax.set_title(feature_name, fontsize=16, fontweight='bold')
    
    # Simplify theta tick labels
    theta_ticks = np.linspace(0, 2 * np.pi, num=8, endpoint=False)
    ax.set_xticks(theta_ticks)
    ax.set_xticklabels([f'{int(np.degrees(tick))}°' for tick in theta_ticks], fontsize=10, color='black')

    # Create a legend
    correction_handles = [plt.Line2D([0], [0], color=correction_color_map(i), lw=4) for i in range(len(unique_corrections))]
    concept_handles = [plt.Line2D([0], [0], color=concept_color_map(i), lw=4) for i in range(len(unique_concepts))]
    ax.legend(correction_handles + concept_handles, 
              list(unique_corrections) + list(unique_concepts), 
              title='Correction Info / Concept ID', 
              bbox_to_anchor=(1.1, 1.1), fontsize=10)

# Function to plot a grid of polar bar charts for each modality
def plot_grid_for_modalities(df, feature_name, modality_col='modality', correction_col='correction_info', concept_col='concept_id'):
    unique_modalities = df[modality_col].unique()
    n_modalities = len(unique_modalities)
    
    # Determine grid size (e.g., 2x2, 3x3, etc.)
    grid_size = int(np.ceil(np.sqrt(n_modalities)))
    
    fig, axs = plt.subplots(grid_size, grid_size, figsize=(grid_size*8, grid_size*8), subplot_kw=dict(polar=True))
    axs = axs.flatten()
    
    for i, modality in enumerate(unique_modalities):
        modality_df = df[df[modality_col] == modality]
        if modality_df.empty:
            continue
        plot_feature(axs[i], modality_df, feature_name, correction_col, concept_col)
        axs[i].set_title(f"{feature_name}\n{modality}", fontsize=14, fontweight='bold')
    
    # Hide any unused subplots
    for j in range(i + 1, grid_size * grid_size):
        fig.delaxes(axs[j])
    
    plt.tight_layout()
    plt.show()

# Example usage
feature_to_plot = 'arm_torque_sum_change_integral_rel'  # Change this to plot different features
plot_grid_for_modalities(features_df_norm, feature_to_plot)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


df = pd.read_csv("features_df_final.csv")

# keep only concept water, zoet and ver
subdf = df[df["concept"].isin(["water", "zoet", "ver"])]

# Set the aesthetic style of the plots
sns.set(style="whitegrid")

# Custom color palette
palette = sns.color_palette("Set2")

# Plotting with enhancements
plt.figure(figsize=(10, 6))

sns.boxplot(
    data=subdf,
    x="concept",
    y="arm_torque_sum_change_integral",
    hue="correction_info",
    palette=palette,
    boxprops=dict(alpha=.5, linewidth=1.5),  # Semi-transparent boxes with thicker lines
    whiskerprops=dict(linewidth=1.5),  # Thicker whiskers
    capprops=dict(linewidth=1.5),  # Thicker caps
    #medianprops=dict(linewidth=2, color="red"),  # Emphasize the median line
    showfliers=False  # Hide outliers to declutter
)

sns.stripplot(
    data=subdf,
    x="concept",
    y="arm_torque_sum_change_integral",
    hue="correction_info",
    palette=palette,
    dodge=True,
    marker="o",
    size=15,  # Larger markers
    alpha=0.7,  # Semi-transparent markers
    linewidth=0.5,  # Thin line around markers
    legend=False
)

# Optional: Add title and labels with modern fonts
plt.title("COPc Peak Mean by Modality and Correction Info", fontsize=16, weight='bold')
plt.xlabel("Modality", fontsize=14)
plt.ylabel("COPc Peak Mean", fontsize=14)

# Improve the legend
plt.legend(title="Correction Info", title_fontsize='13', fontsize='11', loc='upper right')

# Minimalist layout adjustment
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Load in features_norm_df_final.csv
df_rel = pd.read_csv("features_df_final_with_rel.csv")

def plot_feature(df, feature_name, modality_to_plot, correction_col='correction_info', concept_col='concept_id'):
    if feature_name not in df.columns:
        raise ValueError(f"Feature '{feature_name}' not found in DataFrame.")
    if correction_col not in df.columns:
        raise ValueError(f"Correction info column '{correction_col}' not found in DataFrame.")
    
    # Check if there's any data to plot
    if df.empty:
        raise ValueError(f"No data available for the selected feature and modality.")
    
    # Filter the DataFrame by the specified modality
    subdf = df[df['modality'] == modality_to_plot]

    # Sort the data by feature values
    df_sorted = subdf.sort_values(by=feature_name)
    feature_values = df_sorted[feature_name].values
    correction_info = df_sorted[correction_col].values
    concept_id = df_sorted[concept_col].values
    answer_fol_sim = df_sorted['answer_fol_sim'].values
    answer_prev_sim = df_sorted['answer_prev_sim'].values
    
    if len(feature_values) == 0:
        raise ValueError(f"No feature values available to plot.")
    
    # Compute angles for each bar
    N = len(feature_values)
    theta = np.linspace(0.0, 360.0, N, endpoint=False)
    
    # Radius for each bar (normalized)
    radii = feature_values / np.max(feature_values)
    
    # Unique corrections and color map
    unique_corrections = np.unique(correction_info)
    color_map = px.colors.qualitative.T10
    
    color_dict = {correction: color_map[i % len(color_map)] for i, correction in enumerate(unique_corrections)}
    
    # Colors for each bar based on correction category
    bar_colors = [color_dict[correction] for correction in correction_info]
    
    # Create the polar plot
    fig = go.Figure()

    fig.add_trace(go.Barpolar(
        r=radii,
        theta=theta,
        width=[360.0 / N] * N,
        marker_color=bar_colors,
        opacity=0.8,
        hoverinfo='text',
        text=[f"{feature_name}: {value:.2f}<br>Correction: {correction}<br>Concept ID: {concept_id}<br>Distance of previous answer: {answer_prev}<br>Distance of following answer: {answer_fol}" for value, correction, concept_id, answer_prev, answer_fol in zip(feature_values, correction_info, concept_id, answer_prev_sim, answer_fol_sim)]
    ))

    fig.update_layout(
        title=feature_name,
        polar=dict(
            radialaxis=dict(range=[0, 1], visible=True)
        ),
        showlegend=False
    )

    return fig

# def plot_feature_for_modality(df, feature_name, modality, correction_col='correction_info'):
#     modality_df = df[df['modality'] == modality]
    
#     # Debugging: print the shape and a sample of the filtered DataFrame
#     print(f"Modality: {modality}")
#     print(f"Number of rows for modality '{modality}': {modality_df.shape[0]}")
#     print(modality_df.head())
    
#     return plot_feature(modality_df, feature_name, correction_col)

# Example usage
feature_to_plot = 'arm_torque_sum_change_integral'  # Change this to plot different features
modality_to_plot = 'gebaren'  # Change this to specify which modality to plot

fig = plot_feature(df_rel, feature_to_plot, modality_to_plot)
fig.show()


In [None]:
# DONE // bbmv_arms                              
# DONE // intermittency_rwrist
# DONE // RIGHT_WRIST_acceleration_ipi_avg
# DONE (but might need to adjust conversion // acceleration_mean_peak
# DONE peak_n
# DONE (but convert) RIGHT_WRIST_speed_peak_mean
# DONE also integral

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt
import seaborn as sns

#features_df = pd.read_csv("features_df_exp1_final.csv")
# delete rows where correction is c1 and c2
#features_df = features_df[features_df['concept'] != 'c1']
#features_df = features_df[features_df['concept'] != 'c2']

# Load the data from folder under variable datafolder
datapath = datafolder + "features_df_final.csv"
df = pd.read_csv(datapath)

sampledf = df.copy()

vartoplot = 'arm_torque_sum_change_integral'
label = 'Torque change integral (arm)'

# in modality, rename labels to english
sampledf['modality'] = sampledf['modality'].replace('gebaren', 'Gesture')
sampledf['modality'] = sampledf['modality'].replace('geluiden', 'Vocalization')
sampledf['modality'] = sampledf['modality'].replace('combinatie', 'Combined')


# order df by correction_info: c0_only, c0, c1, c2
sampledf['correction_info'] = sampledf['correction_info'].astype('category')
sampledf['correction_info'] = sampledf['correction_info'].cat.reorder_categories(['c0_only', 'c0', 'c1', 'c2'], ordered=True)

# filter out one modality that we will not display
sampledf = sampledf[sampledf['modality'] != 'Vocalization']
colors = ['teal', 'darkorange', '#556B2F', 'navy']

#colors = ['darkorange', '#556B2F', 'teal']
# Remove outliers using Tukey's rule
# Q1 = sampledf[vartoplot].quantile(0.25)
# Q3 = sampledf[vartoplot].quantile(0.75)
# IQR = Q3 - Q1
# sampledf = sampledf[(sampledf[vartoplot] > (Q1 - 1.5 * IQR)) & (sampledf[vartoplot] < (Q3 + 1.5 * IQR))]

# Order sampledf by group
#sampledf['modality'] = sampledf['modality'].astype('category')
#sampledf['group'] = sampledf['group'].cat.reorder_categories(['Gesture', 'Learners_1', 'Learners_2', 'DGS'], ordered=True)

# Set up style
sns.set(style="whitegrid", context="notebook")
plt.figure(figsize=(12, 7))

# Create boxplot with gradient and custom palette
box_palette = sns.color_palette("Spectral", len(sampledf['modality'].unique()))
sns.boxplot(
    data=sampledf,
    x='modality',
    y=vartoplot,
    palette=colors,
    hue='correction_info',
    width=0.5,
    linewidth=1.5,
    fliersize=0  # Hide default outliers
)

# Enhanced strip plot with varied color points for each group
sns.stripplot(
    data=sampledf,
    x='modality',
    y=vartoplot,
    palette=colors,
    hue='correction_info',
    edgecolor='gray', 
    linewidth=0.6,
    alpha=0.7,        # Increase transparency for better layering
    size=6,           # Slightly larger points
    jitter=0,
    dodge=True,       # More spread for individual points
    legend=False
)

# Title and labels with larger fonts and contemporary style
#plt.title("Volume Distribution Across Groups", fontsize=20, weight='bold', color="#333333")
#plt.xlabel("Group ID", fontsize=16, labelpad=10)
plt.ylabel(label, fontsize="25", labelpad=10)

# Customize axes and layout
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)
plt.grid(True, which='both', linestyle='--', linewidth=0.5, color='gray', alpha=0.3)
plt.tight_layout()

# save this plot
# name
plotname = f'{vartoplot}_boxplot.png'
plt.savefig(plotname, dpi=300, transparent=True)

# Display the plot
plt.show()









In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

# Select the relevant subset of the data
#subdf_ges = features_df[features_df["modality"] == "gebaren"]
#df = pd.read_csv("features_df_final_with_rel.csv")

# drop gebaren modality
#subdf = df[df["modality"] != "geluiden"]
subdf = features_df

feature1 = "answer_prev_dist"
feature2 = "COPc_integral"

# Create a sub-dataframe with the selected features, dropping missing values
subdf = subdf[[feature1, feature2]].dropna()

# Compute the correlation coefficient
corr_coef, _ = pearsonr(subdf[feature1], subdf[feature2])

# Create a joint plot with scatter and marginal histograms
plt.figure(figsize=(10, 6))
sns.set(style="whitegrid")

# Using seaborn's jointplot to create scatter plot with histograms
g = sns.jointplot(x=subdf[feature1], y=subdf[feature2], kind='reg', height=8,
                  scatter_kws={'s': 50, 'alpha': 0.7}, marginal_kws=dict(bins=20, fill=True), color='navy')

# Annotate the plot with the correlation coefficient
#g.fig.suptitle(f'Correlation Coefficient: {corr_coef:.2f}', fontsize=16)
g.fig.subplots_adjust(top=0.93)  # Adjust the title position

# print correlation
print(f"Correlation coefficient between {feature1} and {feature2}: {corr_coef:.2f}")

# Customize axis labels
#g.set_axis_labels(feature1, feature2, fontsize=14)
# adapt y label
g.set_axis_labels("Similarity of answer and target", "Change in COP integral", fontsize=14)
# get rid of axis labels
#g.set_axis_labels("", "")
# Show plot
# save with high dpi
plot_name = f"{feature1}_vs_{feature2}_jointplot.png"
g.savefig(plot_name, dpi=300, transparent=True)


plt.show()


