In [2]:
#Cargamos las librerías necesarias para el análisis
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 
from sklearn.preprocessing import normalize
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
import time 
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.io import loadmat
from PIL import Image
from io import BytesIO
import os


## Building images dataset

In [None]:
def show_whats_written(df, output_path, file):
    # Filter the DataFrame
    filtered_df = df[(df.X != 683) & (df.Y != 384)]

    # Create a line plot for the clean data
    plt.plot(filtered_df['X'], filtered_df['Y'])

    # Add labels and a title
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.title(f'X vs. Y Coordinates - Trial {trial}')

    file_name = file.split('.')[0]

    # Save the plot as an image
    image_filename = os.path.join(output_path, f'{file_name}.png')
    print(image_filename)
    plt.savefig(image_filename)

    plt.close()

    # Show the plot
    # plt.show()

data_groups = ["HC","Left_MAS","Right_MAS"]
data_groups = list(data_groups)

# Use os.walk to traverse all subdirectories and process MAT files
for group in data_groups: 
    # Define the path to your data group folder
    data_group_path = "../data/Francis_data/Francis_data/Baseline_fmri/"+group
    
    for root, _, files in os.walk(data_group_path):
        i=0
        for file in files:
            if file.endswith(".mat"):
                file_path = os.path.join(root, file)

                patient_folder = file_path.split('/')[5].split("\\")[1]

                # Load data from the MAT file
                data = loadmat(file_path)
                trial = int(data['S']['trial_cond'][0][0][i][0])
                print(trial)

                #X, Y Coordinates 
                thePoints = data['S']['thePoints'][0][0] 
                file_data = np.array([thePoints[:, 0], thePoints[:, 1]]).T
                df = pd.DataFrame(file_data)
                df.rename(columns={0: "X", 1: "Y"}, inplace=True)

                # Create output folder structure 
                output_folder = "..\output\\"+group+"\\"+patient_folder+"\\"+str(trial)
                os.makedirs(output_folder, exist_ok=True)

                # # Generate and save the plot
                show_whats_written(df, output_folder, file)

                i=i+1


# Features

## NCA y NCV

In [3]:

data_groups = [("HC", 1), ("Left_MAS", 2), ("Right_MAS", 3)]

# Create empty lists to store the data
data_list = []

# Use os.walk to traverse all subdirectories and process MAT files
for group in data_groups:
    # Define the path to your data group folder
    data_group_path = "../data/Francis_data/Francis_data/Baseline_fmri/" + group[0]

    for root, _, files in os.walk(data_group_path):
        i = 0
        for file in files:
            if file.endswith(".mat"):
                file_path = os.path.join(root, file)

                # Load data from the MAT file
                data = loadmat(file_path)
                event_type = int(data['S']['trial_cond'][0][0][i][0])

                # Tiempo
                timeSeries = data['S']['Ts'][0][0][0]
                Sbuttons = data['S']['Sbuttons']
                length = len(Sbuttons[0][0])
                rule = timeSeries[-1] / length
                currenTimeSeries = np.transpose(np.arange(start=0, stop=timeSeries[-1] + rule, step=rule))
                finishTime = timeSeries[-1]

                # X, Y Coordinates
                thePoints = data['S']['thePoints'][0][0]
                x = thePoints[:, 0]
                y = thePoints[:, 1]
                
                if len(x) != len(currenTimeSeries) or len(y) != len(currenTimeSeries):
                    currenTimeSeries = currenTimeSeries[1:]

                # Discarding common outliers

                # Create a mask to filter values where x=683 and y=384
                mask = (x != 683) & (y != 384)

                # Apply the mask to x and y
                filtered_x = x[mask]
                filtered_y = y[mask]

                # Index the values that were discarded
                discarded_indices = np.where(~mask)[0]

                # Discard corresponding values in currenTimeSeries
                filtered_time = np.delete(currenTimeSeries, discarded_indices)

                # Calculate velocity and acceleration
                time_diff = np.diff(filtered_time)
                x_diff = np.diff(filtered_x)
                y_diff = np.diff(filtered_y)
                velocity = np.sqrt(x_diff ** 2 + y_diff ** 2) / time_diff
                acceleration = np.diff(velocity) / time_diff[1:]

                # Count changes in velocity
                NCV = np.sum(np.diff(np.sign(velocity)) != 0)

                # Count changes in acceleration
                NCA = np.sum(np.diff(np.sign(acceleration)) != 0)

                # Append the data to data_list
                data_list.append([group[1], event_type, finishTime, NCV, NCA])

                i = i + 1

# Create a DataFrame from the data_list
df = pd.DataFrame(data_list, columns=["data_group", "event_type", "finishTime", "NCV", "NCA"])

# Display the DataFrame
print(df)



      data_group  event_type  finishTime  NCV  NCA
0              1           1    5.180044   19  107
1              1           3    4.820075   15  100
2              1           2    5.280308   23   67
3              1           4   13.329880   43  152
4              1           2    5.740754   17   92
...          ...         ...         ...  ...  ...
2675           3           1    3.216239   13   76
2676           3           4    3.920909   11   75
2677           3           1    3.630221    9   74
2678           3           3    3.777979   27   73
2679           3           1    4.139451   23  100

[2680 rows x 5 columns]


- `filtered_x` y `filtered_y` representan arreglos de coordenadas x y coordenadas y, respectivamente.
- `time_diff` es un arreglo que representa las diferencias de tiempo entre puntos de datos consecutivos.
- `velocity` es un arreglo que representa las velocidades calculadas en cada punto de datos.
- `acceleration` es un arreglo que representa las aceleraciones calculadas en cada punto de datos.

El código calcula la velocidad y la aceleración en función de los cambios en las coordenadas x y y, así como de las diferencias de tiempo correspondientes. Luego, cuenta los cambios en la velocidad (NCV) y los cambios en la aceleración (NCA) en función del signo de los valores calculados de velocidad y aceleración. Cuando el signo cambia, indica un cambio en la dirección o en la aceleración, por lo que se cuenta como un cambio.

In [15]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots

# Create the DataFrame as you've shown

# Get unique data_group and event_type combinations
data_groups = df["data_group"].unique()
event_types = df["event_type"].unique()

# Define figure size
fig = make_subplots(rows=len(data_groups), cols=len(event_types), shared_xaxes=True, shared_yaxes=True,
                    subplot_titles=["Data Group: " + str(dg) + ", Event Type: " + str(et) 
                                    for dg in data_groups for et in event_types])

# Initialize subplot indices
row = 1
col = 1

for data_group in data_groups:
    col = 1
    for event_type in event_types:
        subset = df[(df["data_group"] == data_group) & (df["event_type"] == event_type)]

        # Create a vertical box plot for NCA
        trace = px.box(subset, y="NCA", orientation="v", labels={"NCA": ""})
        fig.add_trace(trace.data[0], row=row, col=col)

        col += 1

    row += 1

# Define figure layout for NCA
fig.update_layout(title_text="Distribution of NCA by Data Group and Event Type")
# fig.update_xaxes(title_text="Data Group")
# fig.update_yaxes(title_text="NCA")

# Adjust figure size for NCA
fig.update_layout(height=800, width=1200)

# Show the figure for NCA
fig.show()

# Create a new figure for NCV
fig = make_subplots(rows=len(data_groups), cols=len(event_types), shared_xaxes=True, shared_yaxes=True,
                    subplot_titles=["Data Group: " + str(dg) + ", Event Type: " + str(et) 
                                    for dg in data_groups for et in event_types])

# Reset subplot indices
row = 1
col = 1

for data_group in data_groups:
    col = 1
    for event_type in event_types:
        subset = df[(df["data_group"] == data_group) & (df["event_type"] == event_type)]

        # Create a vertical box plot for NCV
        trace = px.box(subset, y="NCV", orientation="v", labels={"NCV": ""})
        fig.add_trace(trace.data[0], row=row, col=col)

        col += 1

    row += 1

# Define figure layout for NCV
fig.update_layout(title_text="Distribution of NCV by Data Group and Event Type")

# Adjust figure size for NCV
fig.update_layout(height=800, width=1200)

# Show the figure for NCV
fig.show()




In [16]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots

# Create the DataFrame as you've shown

# Get unique data_group and event_type combinations
data_groups = df["data_group"].unique()

for data_group in data_groups:
    subset = df[df["data_group"] == data_group]

    # Create a figure for NCA
    fig = px.box(subset, y="NCA", orientation="v", title=f"Distribution of NCA for Data Group: {data_group}")
    fig.update_xaxes(title="NCA")
    fig.update_yaxes(title="Frequency")

    # Show the figure for NCA
    fig.show()

    # Create a figure for NCV
    fig = px.box(subset, y="NCV", orientation="v", title=f"Distribution of NCV for Data Group: {data_group}")
    fig.update_xaxes(title="NCV")
    fig.update_yaxes(title="Frequency")

    # Show the figure for NCV
    fig.show()
