In [1]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from itertools import zip_longest
import os
import glob
import ast

In [2]:
input_folder = "Error, Hand Measurements/2x green box 165gr - correct calibration"
output_folder = "Error, Hand Measurements, cleaned/2x green box 165gr cleaned"
BATCH_SIZE = 5000  # files (measurements) per flush

os.makedirs(output_folder, exist_ok=True)  # ensure folder exists
final_csv = os.path.join(output_folder, "Hand_measurements_correct_cleaned.csv")  # <-- final file name

files = glob.glob(os.path.join(input_folder, "*.json"))

run_counter = 0
wrote_header = False
batch = []
columns_order = None  # will lock on first batch write
accepted_in_batch = 0

for file in files:
    try:
        with open(file, encoding="utf-8") as f:
            data = json.load(f)

        df = pd.json_normalize(
            data,
            record_path="raw_measurements",
            meta=["version","type","start_time","end_time","sampling_rate",
                  "weight_filter","zero_offset","expected_weight","robot_type","force_sensor_offset"],
            errors="ignore"
        )

        df["timestamp"] = df["timestamp"] - df["timestamp"].iloc[0]

        # your checks
        if len(df) < 400:               continue
        if "value" not in df.columns:   continue
        if df["value"].isna().all() or (df["value"] == 0).all():  continue

        # ids
        df["measurement_id"] = os.path.splitext(os.path.basename(file))[0]
        run_counter += 1
        df["run_index"] = run_counter

        batch.append(df)
        accepted_in_batch += 1

        # flush every BATCH_SIZE accepted files
        if accepted_in_batch >= BATCH_SIZE:
            out = pd.concat(batch, ignore_index=True)

            # lock column order on first write; enforce consistently thereafter
            if columns_order is None:
                columns_order = out.columns.tolist()
            else:
                out = out.reindex(columns=columns_order, fill_value=pd.NA)

            out.to_csv(final_csv, mode="a", index=False, encoding="utf-8",
                       header=not wrote_header)
            wrote_header = True

            # clear memory
            batch.clear()
            accepted_in_batch = 0
            del out
            gc.collect()

    except Exception:
        continue

# flush remainder
if batch:
    out = pd.concat(batch, ignore_index=True)
    if columns_order is None:
        columns_order = out.columns.tolist()
    else:
        out = out.reindex(columns=columns_order, fill_value=pd.NA)
    out.to_csv(final_csv, mode="a", index=False, encoding="utf-8",
               header=not wrote_header)

print(f"Done. Appended to {os.path.abspath(final_csv)}")

Done. Appended to C:\Users\piete\Documents\Smart Robotics PROJECT\Error, Hand Measurements, cleaned\2x green box 165gr cleaned\Hand_measurements_correct_cleaned.csv


In [3]:
df = pd.read_csv(r"Error, Hand Measurements, cleaned\2x green box 165gr cleaned\Hand_measurements_correct_cleaned.csv")

  df = pd.read_csv(r"Error, Hand Measurements, cleaned\2x green box 165gr cleaned\Hand_measurements_correct_cleaned.csv")


In [4]:
df

Unnamed: 0,timestamp,force_vector,value,robot_tcp.timestamp,robot_tcp.tcp_offset,robot_tcp.joint_angles,robot_tcp.velocity_angular,robot_tcp.velocity_linear,robot_tcp.flange,version,...,start_time,end_time,sampling_rate,weight_filter,zero_offset,expected_weight,robot_type,force_sensor_offset,measurement_id,run_index
0,0.0,"[0.866, 1.205, 28304.0]",0.349,1759831546.0,"[[0.0, 0.0, 0.41], [1.0, 0.0, 0.0, 0.0]]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16820229589939117, -0.7221682071685791, 0...",2,...,1759831546.3848302,1759831546.8858523,1000.0,,0.28500204918032784,0.16500000655651093,ur10e,"[[0.0, 0.0, 0.02], [1.0, 0.0, 0.0, 0.0]]",2025-10-07_12-05-46-384668_measure_weight_FORC...,1
1,0.0010154247283935547,"[0.922, 1.238, 28523.0]",0.349,1759831546.0,"[[0.0, 0.0, 0.41], [1.0, 0.0, 0.0, 0.0]]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16830548644065857, -0.7219724059104919, 0...",2,...,1759831546.3848302,1759831546.8858523,1000.0,,0.28500204918032784,0.16500000655651093,ur10e,"[[0.0, 0.0, 0.02], [1.0, 0.0, 0.0, 0.0]]",2025-10-07_12-05-46-384668_measure_weight_FORC...,1
2,0.0019974708557128906,"[0.986, 1.283, 29084.0]",0.35,1759831546.0,"[[0.0, 0.0, 0.41], [1.0, 0.0, 0.0, 0.0]]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16830548644065857, -0.7219724059104919, 0...",2,...,1759831546.3848302,1759831546.8858523,1000.0,,0.28500204918032784,0.16500000655651093,ur10e,"[[0.0, 0.0, 0.02], [1.0, 0.0, 0.0, 0.0]]",2025-10-07_12-05-46-384668_measure_weight_FORC...,1
3,0.0030672550201416016,"[1.051, 1.334, 30009.0]",0.351,1759831546.0,"[[0.0, 0.0, 0.41], [1.0, 0.0, 0.0, 0.0]]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16843916475772858, -0.7217670679092407, 0...",2,...,1759831546.3848302,1759831546.8858523,1000.0,,0.28500204918032784,0.16500000655651093,ur10e,"[[0.0, 0.0, 0.02], [1.0, 0.0, 0.0, 0.0]]",2025-10-07_12-05-46-384668_measure_weight_FORC...,1
4,0.004062652587890625,"[1.113, 1.387, 31161.0]",0.351,1759831546.0,"[[0.0, 0.0, 0.41], [1.0, 0.0, 0.0, 0.0]]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16843916475772858, -0.7217670679092407, 0...",2,...,1759831546.3848302,1759831546.8858523,1000.0,,0.28500204918032784,0.16500000655651093,ur10e,"[[0.0, 0.0, 0.02], [1.0, 0.0, 0.0, 0.0]]",2025-10-07_12-05-46-384668_measure_weight_FORC...,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149434,0.497808,"[0.079, 0.419, 32123.0]",0.471,1759832865.0,"[[0.0, 0.0, 0.41], [1.0, 0.0, 0.0, 0.0]]","[-2.615244452153341, -1.6014510593810023, -0.8...","[-1.1595742567563786e-05, 1.997372818185578e-0...","[3.1233930095163085e-06, 1.8133273872466e-06, ...","[[-0.5956849455833435, -0.144369438290596, 1.0...",2,...,1759832865.354132,1759832865.855006,1000.0,,0.126033,0.165,ur10e,"[[0.0, 0.0, 0.02], [1.0, 0.0, 0.0, 0.0]]",2025-10-07_12-27-45-353927_measure_weight_FORC...,35
149435,0.498302,"[0.079, 0.419, 32123.0]",0.471,1759832865.0,"[[0.0, 0.0, 0.41], [1.0, 0.0, 0.0, 0.0]]","[-2.615244452153341, -1.6014510593810023, -0.8...","[-1.1595742567563786e-05, 1.997372818185578e-0...","[3.1233930095163085e-06, 1.8133273872466e-06, ...","[[-0.5956849455833435, -0.144369438290596, 1.0...",2,...,1759832865.354132,1759832865.855006,1000.0,,0.126033,0.165,ur10e,"[[0.0, 0.0, 0.02], [1.0, 0.0, 0.0, 0.0]]",2025-10-07_12-27-45-353927_measure_weight_FORC...,35
149436,0.498795,"[0.075, 0.417, 32126.0]",0.472,1759832865.0,"[[0.0, 0.0, 0.41], [1.0, 0.0, 0.0, 0.0]]","[-2.615244452153341, -1.6014510593810023, -0.8...","[-1.1595742567563786e-05, 1.997372818185578e-0...","[3.1233930095163085e-06, 1.8133273872466e-06, ...","[[-0.5956849455833435, -0.144369438290596, 1.0...",2,...,1759832865.354132,1759832865.855006,1000.0,,0.126033,0.165,ur10e,"[[0.0, 0.0, 0.02], [1.0, 0.0, 0.0, 0.0]]",2025-10-07_12-27-45-353927_measure_weight_FORC...,35
149437,0.499346,"[0.075, 0.417, 32126.0]",0.472,1759832865.0,"[[0.0, 0.0, 0.41], [1.0, 0.0, 0.0, 0.0]]","[-2.615244452153341, -1.6014510593810023, -0.8...","[-1.1595742567563786e-05, 1.997372818185578e-0...","[3.1233930095163085e-06, 1.8133273872466e-06, ...","[[-0.5956849455833435, -0.144369438290596, 1.0...",2,...,1759832865.354132,1759832865.855006,1000.0,,0.126033,0.165,ur10e,"[[0.0, 0.0, 0.02], [1.0, 0.0, 0.0, 0.0]]",2025-10-07_12-27-45-353927_measure_weight_FORC...,35


In [5]:
df = df.drop('start_time', axis=1)
df = df.drop('end_time', axis=1)
df = df.drop('version', axis=1)
df = df.drop('timestamp', axis=1)
df = df.drop('type', axis=1)
df = df.drop('sampling_rate', axis=1)
df = df.drop('weight_filter', axis=1)
df = df.drop('robot_type', axis=1)
df = df.drop('measurement_id', axis=1)
df = df.drop('robot_tcp.timestamp', axis=1)
df = df.drop('force_sensor_offset', axis=1) # always 1, so contains no information.
df = df.drop('robot_tcp.tcp_offset', axis=1) # always 1, so contains no information.

df['value'] = pd.to_numeric(df['value'], errors='coerce')
df['zero_offset'] = pd.to_numeric(df['zero_offset'], errors='coerce')
df['force_sensor_value'] = df['value'] - df['zero_offset'] # does the zero_offset only impact the 'value' colummn? My assumption is yes, but not 100% sure.
df = df.drop('value', axis=1)
df = df.drop('zero_offset', axis=1)

In [6]:
df.keys()

Index(['force_vector', 'robot_tcp.joint_angles', 'robot_tcp.velocity_angular',
       'robot_tcp.velocity_linear', 'robot_tcp.flange', 'expected_weight',
       'run_index', 'force_sensor_value'],
      dtype='object')

In [7]:
df

Unnamed: 0,force_vector,robot_tcp.joint_angles,robot_tcp.velocity_angular,robot_tcp.velocity_linear,robot_tcp.flange,expected_weight,run_index,force_sensor_value
0,"[0.866, 1.205, 28304.0]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16820229589939117, -0.7221682071685791, 0...",0.16500000655651093,1,0.063998
1,"[0.922, 1.238, 28523.0]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16830548644065857, -0.7219724059104919, 0...",0.16500000655651093,1,0.063998
2,"[0.986, 1.283, 29084.0]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16830548644065857, -0.7219724059104919, 0...",0.16500000655651093,1,0.064998
3,"[1.051, 1.334, 30009.0]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16843916475772858, -0.7217670679092407, 0...",0.16500000655651093,1,0.065998
4,"[1.113, 1.387, 31161.0]","[-1.5717976729022425, -1.7618028126158656, -1....","[-0.0018238269569752744, 0.005977372371598684,...","[-0.02149872902181782, 0.03205436845490284, 1....","[[-0.16843916475772858, -0.7217670679092407, 0...",0.16500000655651093,1,0.065998
...,...,...,...,...,...,...,...,...
149434,"[0.079, 0.419, 32123.0]","[-2.615244452153341, -1.6014510593810023, -0.8...","[-1.1595742567563786e-05, 1.997372818185578e-0...","[3.1233930095163085e-06, 1.8133273872466e-06, ...","[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.344967
149435,"[0.079, 0.419, 32123.0]","[-2.615244452153341, -1.6014510593810023, -0.8...","[-1.1595742567563786e-05, 1.997372818185578e-0...","[3.1233930095163085e-06, 1.8133273872466e-06, ...","[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.344967
149436,"[0.075, 0.417, 32126.0]","[-2.615244452153341, -1.6014510593810023, -0.8...","[-1.1595742567563786e-05, 1.997372818185578e-0...","[3.1233930095163085e-06, 1.8133273872466e-06, ...","[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.345967
149437,"[0.075, 0.417, 32126.0]","[-2.615244452153341, -1.6014510593810023, -0.8...","[-1.1595742567563786e-05, 1.997372818185578e-0...","[3.1233930095163085e-06, 1.8133273872466e-06, ...","[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.345967


In [13]:
# for col in df.columns:
#     s = df[col]
#     if s.dtype == object and s.str.startswith('[').all() and ~s.str.contains(r'\[\[').all():
#         parts = s.str[1:-1].str.split(',', expand=True)        # vectorized split
#         parts = parts.apply(pd.to_numeric, errors='coerce')     # vectorized cast
#         parts.columns = [f"{col}_{i+1}" for i in range(parts.shape[1])]
#         df[parts.columns] = parts
#         df.drop(columns=[col], inplace=True)

for col in df.columns:
    if df[col].dtype != object:
        continue
    s = df[col].astype(str).str.strip()
    if not s.str.startswith('[').any() or s.str.startswith('[[').any():
        continue
    vals = s.apply(lambda x: ast.literal_eval(x) if x.startswith('[') else np.nan)
    vals = vals.apply(lambda x: x if isinstance(x, list) else np.nan)
    if vals.dropna().empty:
        continue
    maxlen = max(len(v) for v in vals.dropna())
    parts = pd.DataFrame(
        vals.apply(lambda v: (v + [np.nan]*(maxlen - len(v))) if isinstance(v, list) else [np.nan]*maxlen).tolist(),
        index=df.index
    )
    parts.columns = [f"{col}_{i+1}" for i in range(parts.shape[1])]
    parts = parts.apply(pd.to_numeric, errors='coerce')
    df = pd.concat([df.drop(columns=[col]), parts], axis=1)

In [14]:
df

Unnamed: 0,robot_tcp.flange,expected_weight,run_index,force_sensor_value,force_vector_1,force_vector_2,force_vector_3,robot_tcp.joint_angles_1,robot_tcp.joint_angles_2,robot_tcp.joint_angles_3,robot_tcp.joint_angles_4,robot_tcp.joint_angles_5,robot_tcp.joint_angles_6,robot_tcp.velocity_angular_1,robot_tcp.velocity_angular_2,robot_tcp.velocity_angular_3,robot_tcp.velocity_linear_1,robot_tcp.velocity_linear_2,robot_tcp.velocity_linear_3
0,"[[-0.16820229589939117, -0.7221682071685791, 0...",0.16500000655651093,1,0.063998,0.866,1.205,28304.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
1,"[[-0.16830548644065857, -0.7219724059104919, 0...",0.16500000655651093,1,0.063998,0.922,1.238,28523.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
2,"[[-0.16830548644065857, -0.7219724059104919, 0...",0.16500000655651093,1,0.064998,0.986,1.283,29084.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
3,"[[-0.16843916475772858, -0.7217670679092407, 0...",0.16500000655651093,1,0.065998,1.051,1.334,30009.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
4,"[[-0.16843916475772858, -0.7217670679092407, 0...",0.16500000655651093,1,0.065998,1.113,1.387,31161.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149434,"[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.344967,0.079,0.419,32123.0,-2.615244,-1.601451,-0.871653,-2.239378,1.564768,-3.865908,-0.000012,0.000020,4.501060e-09,0.000003,0.000002,0.000013
149435,"[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.344967,0.079,0.419,32123.0,-2.615244,-1.601451,-0.871653,-2.239378,1.564768,-3.865908,-0.000012,0.000020,4.501060e-09,0.000003,0.000002,0.000013
149436,"[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.345967,0.075,0.417,32126.0,-2.615244,-1.601451,-0.871653,-2.239378,1.564768,-3.865908,-0.000012,0.000020,4.501060e-09,0.000003,0.000002,0.000013
149437,"[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.345967,0.075,0.417,32126.0,-2.615244,-1.601451,-0.871653,-2.239378,1.564768,-3.865908,-0.000012,0.000020,4.501060e-09,0.000003,0.000002,0.000013


In [15]:
col = 'robot_tcp.flange'

s = df[col]

# Clean like before (flatten and remove brackets/spaces)
inner = (
    s.astype(str)
     .str.replace('],', ',', regex=False)
     .str.replace('[', '', regex=False)
     .str.replace(']', '', regex=False)
     .str.replace(' ', '', regex=False)
     .str.replace('\t', '', regex=False)
     .str.strip(',')
)

# Parse each row individually and count numeric tokens
counts = inner.map(lambda x: len(np.fromstring(x, sep=',')))
commas = inner.str.count(',')
expected = commas.mode()[0] + 1 if not commas.mode().empty else None

# Find bad rows
bad_mask = (expected is not None) & ((counts != expected) | (counts == 0))
bad_rows = df.index[bad_mask].tolist()

if bad_rows:
    print(f"⚠️ Found {len(bad_rows)} malformed rows in '{col}' (expected {expected} numbers per row):\n")
    for i in bad_rows[:10]:  # show up to 10 samples
        print(f"Row {i}: {df.at[i, col]}")
    df = df.drop(index=bad_rows).reset_index(drop=True)
else:
    print(f"✅ All rows in '{col}' parsed cleanly ({expected} numbers per row).")

  counts = inner.map(lambda x: len(np.fromstring(x, sep=',')))


⚠️ Found 4 malformed rows in 'robot_tcp.flange' (expected 7 numbers per row):

Row 29887: robot_tcp.flange
Row 59775: robot_tcp.flange
Row 89663: robot_tcp.flange
Row 119551: robot_tcp.flange


In [16]:
df

Unnamed: 0,robot_tcp.flange,expected_weight,run_index,force_sensor_value,force_vector_1,force_vector_2,force_vector_3,robot_tcp.joint_angles_1,robot_tcp.joint_angles_2,robot_tcp.joint_angles_3,robot_tcp.joint_angles_4,robot_tcp.joint_angles_5,robot_tcp.joint_angles_6,robot_tcp.velocity_angular_1,robot_tcp.velocity_angular_2,robot_tcp.velocity_angular_3,robot_tcp.velocity_linear_1,robot_tcp.velocity_linear_2,robot_tcp.velocity_linear_3
0,"[[-0.16820229589939117, -0.7221682071685791, 0...",0.16500000655651093,1,0.063998,0.866,1.205,28304.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
1,"[[-0.16830548644065857, -0.7219724059104919, 0...",0.16500000655651093,1,0.063998,0.922,1.238,28523.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
2,"[[-0.16830548644065857, -0.7219724059104919, 0...",0.16500000655651093,1,0.064998,0.986,1.283,29084.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
3,"[[-0.16843916475772858, -0.7217670679092407, 0...",0.16500000655651093,1,0.065998,1.051,1.334,30009.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
4,"[[-0.16843916475772858, -0.7217670679092407, 0...",0.16500000655651093,1,0.065998,1.113,1.387,31161.0,-1.571798,-1.761803,-1.916965,-1.053720,1.640832,-3.399368,-0.001824,0.005977,6.306471e-03,-0.021499,0.032054,1.313530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149430,"[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.344967,0.079,0.419,32123.0,-2.615244,-1.601451,-0.871653,-2.239378,1.564768,-3.865908,-0.000012,0.000020,4.501060e-09,0.000003,0.000002,0.000013
149431,"[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.344967,0.079,0.419,32123.0,-2.615244,-1.601451,-0.871653,-2.239378,1.564768,-3.865908,-0.000012,0.000020,4.501060e-09,0.000003,0.000002,0.000013
149432,"[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.345967,0.075,0.417,32126.0,-2.615244,-1.601451,-0.871653,-2.239378,1.564768,-3.865908,-0.000012,0.000020,4.501060e-09,0.000003,0.000002,0.000013
149433,"[[-0.5956849455833435, -0.144369438290596, 1.0...",0.165,35,0.345967,0.075,0.417,32126.0,-2.615244,-1.601451,-0.871653,-2.239378,1.564768,-3.865908,-0.000012,0.000020,4.501060e-09,0.000003,0.000002,0.000013


In [17]:
col = "robot_tcp.flange"
chunk = 1_000_000

s = (df[col].astype(str)
       .str.replace(r'\],\s*\[', ',', regex=True)
       .str.replace(r'[\[\]\s]', '', regex=True)
       .str.replace(',,', ',', regex=False)
       .str.strip(','))

w = s.iloc[0].count(',') + 1
out = np.empty((len(df), w), float)

for start in range(0, len(df), chunk):
    end = min(start + chunk, len(df))
    block = s.iloc[start:end].to_numpy(copy=False)
    big = '\n'.join(block.tolist())
    arr = np.fromstring(big, sep=',', dtype=float)
    if arr.size != (end - start) * w:
        arr = np.concatenate([np.fromstring(x, sep=',', dtype=float) for x in block])
    out[start:end] = arr.reshape(end - start, w)

df[[f"{col}_{i+1}" for i in range(w)]] = out
df.drop(columns=[col], inplace=True)

  arr = np.fromstring(big, sep=',', dtype=float)


In [18]:
df

Unnamed: 0,expected_weight,run_index,force_sensor_value,force_vector_1,force_vector_2,force_vector_3,robot_tcp.joint_angles_1,robot_tcp.joint_angles_2,robot_tcp.joint_angles_3,robot_tcp.joint_angles_4,...,robot_tcp.velocity_linear_1,robot_tcp.velocity_linear_2,robot_tcp.velocity_linear_3,robot_tcp.flange_1,robot_tcp.flange_2,robot_tcp.flange_3,robot_tcp.flange_4,robot_tcp.flange_5,robot_tcp.flange_6,robot_tcp.flange_7
0,0.16500000655651093,1,0.063998,0.866,1.205,28304.0,-1.571798,-1.761803,-1.916965,-1.053720,...,-0.021499,0.032054,1.313530,-0.168202,-0.722168,0.404806,0.991402,0.124804,0.039019,-0.004766
1,0.16500000655651093,1,0.063998,0.922,1.238,28523.0,-1.571798,-1.761803,-1.916965,-1.053720,...,-0.021499,0.032054,1.313530,-0.168305,-0.721972,0.407424,0.991402,0.124797,0.039035,-0.004892
2,0.16500000655651093,1,0.064998,0.986,1.283,29084.0,-1.571798,-1.761803,-1.916965,-1.053720,...,-0.021499,0.032054,1.313530,-0.168305,-0.721972,0.407424,0.991402,0.124797,0.039035,-0.004892
3,0.16500000655651093,1,0.065998,1.051,1.334,30009.0,-1.571798,-1.761803,-1.916965,-1.053720,...,-0.021499,0.032054,1.313530,-0.168439,-0.721767,0.410069,0.991402,0.124795,0.039044,-0.004970
4,0.16500000655651093,1,0.065998,1.113,1.387,31161.0,-1.571798,-1.761803,-1.916965,-1.053720,...,-0.021499,0.032054,1.313530,-0.168439,-0.721767,0.410069,0.991402,0.124795,0.039044,-0.004970
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149430,0.165,35,0.344967,0.079,0.419,32123.0,-2.615244,-1.601451,-0.871653,-2.239378,...,0.000003,0.000002,0.000013,-0.595685,-0.144369,1.029902,0.987445,-0.157961,-0.000040,-0.000021
149431,0.165,35,0.344967,0.079,0.419,32123.0,-2.615244,-1.601451,-0.871653,-2.239378,...,0.000003,0.000002,0.000013,-0.595685,-0.144369,1.029902,0.987445,-0.157961,-0.000040,-0.000021
149432,0.165,35,0.345967,0.075,0.417,32126.0,-2.615244,-1.601451,-0.871653,-2.239378,...,0.000003,0.000002,0.000013,-0.595685,-0.144369,1.029902,0.987445,-0.157961,-0.000040,-0.000021
149433,0.165,35,0.345967,0.075,0.417,32126.0,-2.615244,-1.601451,-0.871653,-2.239378,...,0.000003,0.000002,0.000013,-0.595685,-0.144369,1.029902,0.987445,-0.157961,-0.000040,-0.000021


In [19]:
df = df.apply(pd.to_numeric, errors='coerce').replace([np.inf, -np.inf], np.nan)

print("All columns coerced to numeric.")
print(df.info())

All columns coerced to numeric.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149435 entries, 0 to 149434
Data columns (total 25 columns):
 #   Column                        Non-Null Count   Dtype  
---  ------                        --------------   -----  
 0   expected_weight               149435 non-null  float64
 1   run_index                     149435 non-null  int64  
 2   force_sensor_value            149435 non-null  float64
 3   force_vector_1                149435 non-null  float64
 4   force_vector_2                149435 non-null  float64
 5   force_vector_3                149435 non-null  float64
 6   robot_tcp.joint_angles_1      149435 non-null  float64
 7   robot_tcp.joint_angles_2      149435 non-null  float64
 8   robot_tcp.joint_angles_3      149435 non-null  float64
 9   robot_tcp.joint_angles_4      149435 non-null  float64
 10  robot_tcp.joint_angles_5      149435 non-null  float64
 11  robot_tcp.joint_angles_6      149435 non-null  float64
 12  robot_tcp.ve

In [20]:
df.to_csv('Hand_measurements_cleaned_and_structured.csv', index=False)

In [21]:
df.keys()

Index(['expected_weight', 'run_index', 'force_sensor_value', 'force_vector_1',
       'force_vector_2', 'force_vector_3', 'robot_tcp.joint_angles_1',
       'robot_tcp.joint_angles_2', 'robot_tcp.joint_angles_3',
       'robot_tcp.joint_angles_4', 'robot_tcp.joint_angles_5',
       'robot_tcp.joint_angles_6', 'robot_tcp.velocity_angular_1',
       'robot_tcp.velocity_angular_2', 'robot_tcp.velocity_angular_3',
       'robot_tcp.velocity_linear_1', 'robot_tcp.velocity_linear_2',
       'robot_tcp.velocity_linear_3', 'robot_tcp.flange_1',
       'robot_tcp.flange_2', 'robot_tcp.flange_3', 'robot_tcp.flange_4',
       'robot_tcp.flange_5', 'robot_tcp.flange_6', 'robot_tcp.flange_7'],
      dtype='object')