In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import glob
import os
import scipy.stats as stats
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from plotly.subplots import make_subplots
from scipy.interpolate import griddata


In [2]:
# Load accelerometer data
accel_df = pd.read_csv('all_accelerometer_data_pids_13.csv')

# Convert time to datetime
accel_df['time'] = pd.to_datetime(accel_df['time'], unit='ms')

In [3]:
tac_files = glob.glob("clean_tac/*.csv")
tac_data = {}

for file in tac_files:
    pid = os.path.basename(file).split(".")[0]  # Extract participant ID from filename
    df_tac = pd.read_csv(file)
    df_tac['pid'] = pid  # Add participant id as a new column
    tac_data[pid] = df_tac

# Combine all dataframes into a single dataframe
combined_df = pd.concat(tac_data.values(), ignore_index=True)

# Display the first few rows of the combined dataframe
print(combined_df.head())


    timestamp  TAC_Reading               pid
0  1493727820     0.000215  DK3500_clean_TAC
1  1493728019     0.001716  DK3500_clean_TAC
2  1493729841     0.001921  DK3500_clean_TAC
3  1493731667     0.000569  DK3500_clean_TAC
4  1493731994    -0.001321  DK3500_clean_TAC


In [4]:
phone_df = pd.read_csv('phone_types.csv')


In [5]:

# Define the folder path
raw_files = glob.glob('raw_tac/*.xlsx')

# List to store DataFrames
raw_list = []

for file in raw_files:
    try:
        # Read the Excel file with openpyxl engine
        raw_temp = pd.read_excel(file, engine="openpyxl", skiprows=0)

        # Drop completely empty columns
        raw_temp = raw_temp.dropna(axis=1, how='all')

        # Rename columns to remove "Unnamed" placeholders
        raw_temp.columns = [col if "Unnamed" not in col else f"col_{i}" for i, col in enumerate(raw_temp.columns)]

        # Append to the list
        raw_list.append(raw_temp)

    except Exception as e:
        print(f"⚠️ Error reading {file}: {e}")

# Merge all valid files into a single DataFrame
if raw_list:
    raw_df = pd.concat(raw_list, ignore_index=True)
    print(raw_df.head())  # Print first few rows
else:
    print("⚠️ No valid data files found.")


   TAC Level  IR Voltage  Temperature                Time                Date  \
0      0.000       1.203       77.932 2017-05-02 11:16:02 2017-05-02 11:16:02   
1      0.000       1.139       79.792 2017-05-02 11:46:25 2017-05-02 11:46:25   
2      0.000       1.139       81.464 2017-05-02 11:51:49 2017-05-02 11:51:49   
3      0.016       1.139       82.580 2017-05-02 12:22:13 2017-05-02 12:22:13   
4      0.048       1.139       84.996 2017-05-02 12:52:37 2017-05-02 12:52:37   

  col_0 col_1 col_2 col_3 col_4  
0   NaN   NaN   NaN   NaN   NaN  
1   NaN   NaN   NaN   NaN   NaN  
2   NaN   NaN   NaN   NaN   NaN  
3   NaN   NaN   NaN   NaN   NaN  
4   NaN   NaN   NaN   NaN   NaN  


In [6]:
# Drop columns that are completely empty or redundant
raw_df = raw_df.drop(columns=["col_0", "col_1", "col_2", "col_3", "col_4"], errors="ignore")

# If "Date" and "Time" are duplicated, drop one
if "Date" in raw_df.columns and "Time" in raw_df.columns:
    raw_df = raw_df.drop(columns=["Date"])

# Display cleaned DataFrame
print(raw_df.head())


   TAC Level  IR Voltage  Temperature                Time
0      0.000       1.203       77.932 2017-05-02 11:16:02
1      0.000       1.139       79.792 2017-05-02 11:46:25
2      0.000       1.139       81.464 2017-05-02 11:51:49
3      0.016       1.139       82.580 2017-05-02 12:22:13
4      0.048       1.139       84.996 2017-05-02 12:52:37


In [7]:
raw_df['Time'] = pd.to_datetime(raw_df['Time'])

# Create a new column with the full datetime as a string for hover display
raw_df['Full Date'] = raw_df['Time'].dt.strftime('%Y-%m-%d %H:%M:%S')

fig = px.scatter(
    raw_df,
    x='Time',  
    y='TAC Level',  
    color='Temperature',  
    color_continuous_scale='viridis',  
    hover_data={'Full Date': True, 'IR Voltage': True, 'TAC Level': True},  # Force full date in hover
    title='TAC Level Over Time with Temperature Intensity'
)

# Format x-axis to show only time (HH:MM)
fig.update_xaxes(
    tickformat='%H:%M',  # Show only time on x-axis
    dtick=3600 * 1000  # 1-hour interval in milliseconds
)

# Show the figure
fig.show()

In [3]:


# Scale x, y, z columns using MinMaxScaler
scaler = MinMaxScaler()
scaled_columns = scaler.fit_transform(accel_df[['x', 'y', 'z']])
accel_df[['x', 'y', 'z']] = scaled_columns

# Define grid
x_grid = np.linspace(accel_df['x'].min(), accel_df['x'].max(), 50)
y_grid = np.linspace(accel_df['y'].min(), accel_df['y'].max(), 50)
X, Y = np.meshgrid(x_grid, y_grid)

# Interpolate Z values using griddata
Z = griddata((accel_df['x'], accel_df['y']), accel_df['z'], (X, Y), method='cubic')

# Flatten the grid for line plot
X_flat = X.flatten()
Y_flat = Y.flatten()
Z_flat = Z.flatten()

# Create interactive 3D line plot
fig = go.Figure()

# Add 3D line plot with wider lines and customized color/opacity
fig.add_trace(go.Scatter3d(
    x=X_flat, 
    y=Y_flat, 
    z=Z_flat, 
    mode='lines',  # Lines connecting the data points
    line=dict(color=Z_flat, colorscale='Cividis', width=5),  
    opacity=0.8  # Reduced opacity for a smoother look
))

# Layout settings to make the plot wider and bigger
fig.update_layout(
    title="Enhanced 3D Line Plot of Accelerometer Data",
    scene=dict(
        xaxis_title="X",
        yaxis_title="Y",
        zaxis_title="Z"
    ),
    width=1000,  # Increase width of the plot
    height=700,  # Increase height of the plot
    showlegend=False  # Remove legend for cleaner visualization
)

fig.show()

In [4]:
fig = go.Figure()

fig.add_trace(go.Contour(
    x=x_grid, y=y_grid, z=Z, 
    colorscale='Cividis',
    contours=dict(start=0, end=1, size=0.05, showlabels=True),
    line_width=2
))

fig.update_layout(
    title="3D Contour Line Plot of Accelerometer Data",
    width=1000,
    height=700
)

fig.show()
