In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import plotly.express as px
import glob
import os

In [3]:
# Load accelerometer data
accel_df = pd.read_csv('all_accelerometer_data_pids_13.csv')

# Convert time to datetime
accel_df['time'] = pd.to_datetime(accel_df['time'], unit='ms')

In [4]:
tac_files = glob.glob("clean_tac/*.csv")
tac_data = {}

for file in tac_files:
    pid = os.path.basename(file).split(".")[0]  # Extract participant ID from filename
    df_tac = pd.read_csv(file)
    tac_data[pid] = df_tac


In [5]:
phone_df = pd.read_csv('phone_types.csv')


In [6]:

# Define the folder path
raw_files = glob.glob('raw_tac/*.xlsx')

# List to store DataFrames
raw_list = []

for file in raw_files:
    try:
        # Read the Excel file with openpyxl engine
        raw_temp = pd.read_excel(file, engine="openpyxl", skiprows=0)

        # Drop completely empty columns
        raw_temp = raw_temp.dropna(axis=1, how='all')

        # Rename columns to remove "Unnamed" placeholders
        raw_temp.columns = [col if "Unnamed" not in col else f"col_{i}" for i, col in enumerate(raw_temp.columns)]

        # Append to the list
        raw_list.append(raw_temp)

    except Exception as e:
        print(f"⚠️ Error reading {file}: {e}")

# Merge all valid files into a single DataFrame
if raw_list:
    raw_df = pd.concat(raw_list, ignore_index=True)
    print(raw_df.head())  # Print first few rows
else:
    print("⚠️ No valid data files found.")


   TAC Level  IR Voltage  Temperature                Time                Date  \
0      0.000       1.203       77.932 2017-05-02 11:16:02 2017-05-02 11:16:02   
1      0.000       1.139       79.792 2017-05-02 11:46:25 2017-05-02 11:46:25   
2      0.000       1.139       81.464 2017-05-02 11:51:49 2017-05-02 11:51:49   
3      0.016       1.139       82.580 2017-05-02 12:22:13 2017-05-02 12:22:13   
4      0.048       1.139       84.996 2017-05-02 12:52:37 2017-05-02 12:52:37   

  col_0 col_1 col_2 col_3 col_4  
0   NaN   NaN   NaN   NaN   NaN  
1   NaN   NaN   NaN   NaN   NaN  
2   NaN   NaN   NaN   NaN   NaN  
3   NaN   NaN   NaN   NaN   NaN  
4   NaN   NaN   NaN   NaN   NaN  


In [7]:
# Drop columns that are completely empty or redundant
raw_df = raw_df.drop(columns=["col_0", "col_1", "col_2", "col_3", "col_4"], errors="ignore")

# If "Date" and "Time" are duplicated, drop one
if "Date" in raw_df.columns and "Time" in raw_df.columns:
    raw_df = raw_df.drop(columns=["Date"])

# Display cleaned DataFrame
print(raw_df.head())


   TAC Level  IR Voltage  Temperature                Time
0      0.000       1.203       77.932 2017-05-02 11:16:02
1      0.000       1.139       79.792 2017-05-02 11:46:25
2      0.000       1.139       81.464 2017-05-02 11:51:49
3      0.016       1.139       82.580 2017-05-02 12:22:13
4      0.048       1.139       84.996 2017-05-02 12:52:37


In [8]:
raw_df['Time'] = pd.to_datetime(raw_df['Time'])

# Create a new column with the full datetime as a string for hover display
raw_df['Full Date'] = raw_df['Time'].dt.strftime('%Y-%m-%d %H:%M:%S')

fig = px.scatter(
    raw_df,
    x='Time',  
    y='TAC Level',  
    color='Temperature',  
    color_continuous_scale='viridis',  
    hover_data={'Full Date': True, 'IR Voltage': True, 'TAC Level': True},  # Force full date in hover
    title='TAC Level Over Time with Temperature Intensity'
)

# Format x-axis to show only time (HH:MM)
fig.update_xaxes(
    tickformat='%H:%M',  # Show only time on x-axis
    dtick=3600 * 1000  # 1-hour interval in milliseconds
)

# Show the figure
fig.show()

In [15]:
# Convert timestamps to datetime
accel_df['time'] = pd.to_datetime(accel_df['time'], unit='ms')
for pid, df in tac_data.items():
    if 'timestamp' in df.columns:
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    elif 'time' in df.columns:
        df.rename(columns={'time': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')



In [30]:
date_start = "2017-05-02 00:00:00"
date_end = "2017-05-04 23:59:59"
df_accel = accel_df[(accel_df['time'] >= date_start) & (accel_df['time'] <= date_end)]
for pid, df in tac_data.items():
    tac_data[pid] = df[(df['timestamp'] >= date_start) & (df['timestamp'] <= date_end)]

# Resample all TAC data to 1-hour intervals
for pid, df in tac_data.items():
    tac_data[pid] = df.set_index('timestamp').resample('1h').mean().reset_index()  

# Interactive plots for each participant
for pid, df in tac_data.items():
    fig = px.line(df, x='timestamp', y='TAC_Reading', title=f'TAC Levels for Participant {pid}', markers=True)
    fig.add_hline(y=0.08, line_dash='dash', annotation_text='Legal Limit')
    fig.show()


#

In [25]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from scipy.interpolate import griddata

# Drop NaN values from the dataset
clean_df = raw_df.dropna(subset=['Time', 'Temperature', 'TAC Level'])

# Convert time to numerical format (Unix timestamp in seconds)
clean_df['Time_Num'] = clean_df['Time'].astype('int64') // 10**9  

# Create a grid for the surface plot
time_grid, temp_grid = np.meshgrid(
    np.linspace(clean_df['Time_Num'].min(), clean_df['Time_Num'].max(), 50),
    np.linspace(clean_df['Temperature'].min(), clean_df['Temperature'].max(), 50)
)

# Interpolate TAC values (using nearest neighbor for better handling of sparse data)
tac_grid = griddata(
    (clean_df['Time_Num'], clean_df['Temperature']), 
    clean_df['TAC Level'], 
    (time_grid, temp_grid), 
    method='nearest'  # 'nearest' works better when there are missing points
)

# Create the surface plot
fig = go.Figure(data=[go.Surface(
    x=time_grid,  
    y=temp_grid,  
    z=tac_grid,  
    colorscale='viridis'
)])

fig.update_layout(
    title='3D Surface Plot: TAC Level Across Time and Temperature',
    scene=dict(
        xaxis_title='Time',
        yaxis_title='Temperature (°C)',
        zaxis_title='TAC Level (g/dl)'
    )
)

fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

