### Explore time stamps

For a proper processing of the time series and for handling missing values, this notebook explores the time stamps of recorded data.

In [None]:
# Imports

import os
import sys

# Add project root to Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)


import json
from typing import List

import matplotlib.pyplot as plt
import pandas as pd

from schema import ExperimentData
from utils import (get_data_path, get_screw_driving_static_data,
                   get_screw_driving_serial_data)

### Screw driving:

In [None]:
# Load and look at one screw run 

with open(get_screw_driving_serial_data("Ch_000001619516.json")) as file:
    screw_dict = json.load(file)

time_stamp = []

for step in screw_dict["tightening steps"]:
    time_stamp.extend(step["graph"]["time values"])

target_interval = 0.0012
ideal_time = [round(target_interval*r, 4) for r in range(0, int(max(time_stamp)/target_interval)+1)]

missing_stamps = len(ideal_time) - len(time_stamp)
not_missing = ["green" if s in time_stamp else "red" for s in ideal_time ]

plt.scatter(ideal_time, range(0, len(ideal_time)), color=not_missing)
plt.show()

# most are missing towards the end, when torque changes quickly

In [None]:
# Iterate all and plot as hist 
def get_screw_missing_stamps(pos: str) -> List[int]:
    """Get missing time stamp counts for screw driving recordings."""
    # Load static data and filter by position
    static_data = pd.read_csv(get_screw_driving_static_data(), sep=";")
    static_data = static_data[static_data.workpiece_location == pos]
    
    missing_counts = []
    target_interval = 0.0012
    
    # Iterate all file names
    for file_name in static_data.file_name.tolist():
        with open(get_screw_driving_serial_data(file_name)) as file:
            screw_dict = json.load(file)
        
        # Extract all time stamps
        time_stamps = []
        for step in screw_dict["tightening steps"]:
            time_stamps.extend(step["graph"]["time values"])
        
        # Calculate missing stamps
        if time_stamps:
            ideal_time = [round(target_interval*r, 4) for r in range(0, int(max(time_stamps)/target_interval)+1)]
            missing_stamps = len(ideal_time) - len(time_stamps)
            missing_counts.append(missing_stamps)
    
    return missing_counts

def plot_missing_stamps(pos: str) -> None:
    """Plot histogram of missing time stamps."""
    missing = get_screw_missing_stamps(pos)
    plt.hist(missing, bins=30)
    plt.title(f"Missing Time Stamps - {pos} screw runs")
    plt.xlabel("Number of missing stamps")
    plt.ylabel("Count")
    plt.show()

In [None]:
plot_missing_stamps("right")


### Injection molding:

In [None]:
def get_injection_missing_stamps(workpiece_type: str) -> List[int]:
    """Get missing time stamp counts for injection molding recordings."""
    # Get the directory path and file pattern
    data_dir = get_data_path("injection_molding", f"{workpiece_type}_workpiece", "serial_data")
    
    if workpiece_type == "upper":
        files = list(data_dir.glob("*.csv"))
        target_interval = 0.01  # 10ms intervals
    elif workpiece_type == "lower":
        files = list(data_dir.glob("*.txt"))
        target_interval = 0.03  # 30ms 
    else:
        raise ValueError(f"workpiece_type must be 'upper' or 'lower', got: {workpiece_type}")
    
    missing_counts = []
    
    for file_path in files:
        try:
            if workpiece_type == "upper":
                # Load CSV file
                df = pd.read_csv(file_path, index_col=0)
                time_stamps = df['time'].tolist()
                
            elif workpiece_type == "lower":
                # Load TXT file and parse like in LowerInjectionMoldingData
                with open(file_path, "r") as file:
                    lines = file.readlines()
                
                # Find where data starts (after "-start data-")
                data_start_idx = None
                for i, line in enumerate(lines):
                    if "-start data-" in line:
                        data_start_idx = i + 1
                        break
                
                if data_start_idx is None:
                    continue  # Skip files without data section
                
                # Parse data lines and extract time column (first column)
                time_stamps = []
                data_lines = lines[data_start_idx:]
                for line in data_lines:
                    if line.strip():  # Skip empty lines
                        values = line.strip().split(";")
                        time_stamps.append(float(values[0]))  # First column is time
            
            # Calculate missing stamps
            if time_stamps:
                max_time = max(time_stamps)
                ideal_count = int(max_time / target_interval) + 1
                missing_stamps = ideal_count - len(time_stamps)
                missing_counts.append(missing_stamps)
                
        except Exception as e:
            print(f"Warning: Could not process {file_path.name}: {e}")
            continue
    
    return missing_counts

- UPPER:

In [None]:
target_interval = 0.01 # (remember to round to avoid the floating point issues)

plot_injection_missing_stamps("upper")

- LOWER:

In [None]:
target_interval = 0.03 # (?!)

plot_injection_missing_stamps("lower")