# Files statistics

In [None]:
# @title Kinect Gesture Folder Scan & Summary (Colab-ready)
# 1) Mount Google Drive (optional if your data is in Drive)
from google.colab import drive
drive.mount('/content/drive')

import os
import glob
import pandas as pd
from collections import defaultdict, Counter

# 2) Ask the user for the folder path containing ALL .txt gesture files
#    Examples:
#    - If in Drive: /content/drive/MyDrive/kinect_data
#    - If uploaded to Colab workspace: /content/kinect_data
DATA_DIR = input("Enter the full path to the folder that holds all Kinect .txt files: ").strip()

if not os.path.isdir(DATA_DIR):
    raise NotADirectoryError(f"Path does not exist or is not a directory: {DATA_DIR}")

# 3) Find all .txt files (recursive, in case there are subfolders)
txt_files = sorted(glob.glob(os.path.join(DATA_DIR, "**", "*.txt"), recursive=True))

# --- (1) Count all files and print how many files exist in the folder ---
total_files = len(txt_files)
print(f"Total .txt gesture files found: {total_files}")

# Helpers to aggregate stats
gesture_counts = Counter()         # gesture -> count of files
person_counts  = Counter()         # collectedPerson -> count of files
rows_per_gesture = defaultdict(list)  # gesture -> list of row counts across its files

def parse_filename(fname: str):
    """
    Parse file name of the form: gesture_collectedPerson[...].txt
    Robust to extra underscores after the first two parts (e.g., indices).
    Returns (gesture, person).
    """
    base = os.path.basename(fname)
    name, ext = os.path.splitext(base)
    parts = name.split("_")
    gesture = parts[0] if len(parts) >= 1 else "UNKNOWN"
    person  = parts[1] if len(parts) >= 2 else "UNKNOWN"
    return gesture, person

def count_rows_fast(path: str) -> int:
    """
    Count number of lines (rows) in a text file quickly.
    Each line is assumed to represent one time-sample (or similar).
    """
    n = 0
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        for _ in f:
            n += 1
    return n

# 4) Aggregate per-file statistics
for fp in txt_files:
    gesture, person = parse_filename(fp)
    gesture_counts[gesture] += 1
    person_counts[person] += 1

    # Count rows in this file and store per gesture
    n_rows = count_rows_fast(fp)
    rows_per_gesture[gesture].append(n_rows)

# --- (2) Count how many gesture records per gesture category ---
gesture_df = pd.DataFrame(
    {"gesture": list(gesture_counts.keys()),
     "num_records": list(gesture_counts.values())}
).sort_values(["gesture"]).reset_index(drop=True)

# Save as requested
gesture_csv_path = os.path.join(DATA_DIR, "num_of_record_per_geture.csv")  # (kept exact filename as requested)
gesture_df.to_csv(gesture_csv_path, index=False)
print(f"Saved per-gesture counts to: {gesture_csv_path}")

# --- (3) Count how many files were recorded per collectedPerson name ---
person_df = pd.DataFrame(
    {"collectedPerson": list(person_counts.keys()),
     "num_records": list(person_counts.values())}
).sort_values(["collectedPerson"]).reset_index(drop=True)

person_csv_path = os.path.join(DATA_DIR, "person_collected.csv")
person_df.to_csv(person_csv_path, index=False)
print(f"Saved per-person counts to: {person_csv_path}")

# --- (4) For each gesture: min, max, average number of rows per file ---
summary_rows = []
for g, row_list in rows_per_gesture.items():
    if len(row_list) == 0:
        continue
    summary_rows.append({
        "gesture": g,
        "min_number_of_rows_per_gesture_file": int(min(row_list)),
        "max_number_of_rows_per_gesture_file": int(max(row_list)),
        "average_number_of_rows_per_gesture_from_all": float(sum(row_list) / len(row_list)),
    })

rows_summary_df = pd.DataFrame(summary_rows).sort_values("gesture").reset_index(drop=True)

# Display tables in Colab
print("\n=== Per-gesture counts ===")
display(gesture_df)

print("\n=== Per-person counts ===")
display(person_df)

print("\n=== Rows-per-gesture summary ===")
display(rows_summary_df)

# (Optional) also save the rows summary next to the others for convenience
rows_summary_csv = os.path.join(DATA_DIR, "rows_per_gesture_summary.csv")
rows_summary_df.to_csv(rows_summary_csv, index=False)
print(f"Saved rows-per-gesture summary to: {rows_summary_csv}")

Mounted at /content/drive
Enter the full path to the folder that holds all Kinect .txt files: /content/drive/MyDrive/0 ML - DL - AI - BD/Advanced Data Mining/Project/combined
Total .txt gesture files found: 2700
Saved per-gesture counts to: /content/drive/MyDrive/0 ML - DL - AI - BD/Advanced Data Mining/Project/combined/num_of_record_per_geture.csv
Saved per-person counts to: /content/drive/MyDrive/0 ML - DL - AI - BD/Advanced Data Mining/Project/combined/person_collected.csv

=== Per-gesture counts ===


Unnamed: 0,gesture,num_records
0,afternoon,90
1,baby,90
2,big,90
3,born,90
4,bye,90
5,calendar,90
6,child,90
7,cloud,90
8,come,90
9,daily,90



=== Per-person counts ===


Unnamed: 0,collectedPerson,num_records
0,,18
1,apurve,261
2,gautam,270
3,mahendra,270
4,parveen,270
5,pradeep,261
6,pramod,270
7,prateek,270
8,rajat,270
9,rajkumar,270



=== Rows-per-gesture summary ===


Unnamed: 0,gesture,min_number_of_rows_per_gesture_file,max_number_of_rows_per_gesture_file,average_number_of_rows_per_gesture_from_all
0,afternoon,47,110,74.844444
1,baby,43,122,84.055556
2,big,49,128,74.588889
3,born,27,134,74.888889
4,bye,46,116,73.344444
5,calendar,27,107,73.322222
6,child,32,174,80.055556
7,cloud,36,137,72.211111
8,come,35,119,71.444444
9,daily,43,189,81.544444


Saved rows-per-gesture summary to: /content/drive/MyDrive/0 ML - DL - AI - BD/Advanced Data Mining/Project/combined/rows_per_gesture_summary.csv


# Motion Changes

In [None]:
# === Verify Kinect v1 single-frame skeleton connectivity (Colab-ready) ===
# Goal: Upload ONE .txt file (rows=frames, 60 values per row: 20 joints × x,y,z).
#       Take ONLY THE FIRST ROW, interpret it as Kinect v1 joint order shown in the image,
#       and draw a single 3D skeleton with correct human-like connections.

import numpy as np
from google.colab import files
import plotly.graph_objects as go

print("Upload your Kinect .txt file (each row has 60 numbers: x,y,z for 20 joints).")
uploaded = files.upload()
fname = next(iter(uploaded.keys()))

# --- Load data and take ONLY THE FIRST ROW ---
raw = np.genfromtxt(fname, dtype=float)
if raw.ndim == 1:
    first_row = raw
else:
    first_row = raw[0]  # use only row 0 as requested

assert first_row.size % 3 == 0, "Expected multiples of 3 columns (x,y,z per joint)."
n_joints = first_row.size // 3
assert n_joints == 20, f"Expected 20 joints for Kinect v1, found {n_joints}."

# --- Kinect v1 joint order (TOP→BOTTOM as in your image) ---
JOINT_NAMES = [
    "HipCenter",       # 0
    "Spine",           # 1
    "ShoulderCenter",  # 2
    "Head",            # 3
    "ShoulderLeft",    # 4
    "ElbowLeft",       # 5
    "WristLeft",       # 6
    "HandLeft",        # 7
    "ShoulderRight",   # 8
    "ElbowRight",      # 9
    "WristRight",      #10
    "HandRight",       #11
    "HipLeft",         #12
    "KneeLeft",        #13
    "AnkleLeft",       #14
    "FootLeft",        #15
    "HipRight",        #16
    "KneeRight",       #17
    "AnkleRight",      #18
    "FootRight"        #19
]

# --- Connectivity matching a human figure (v1) ---
EDGES = [
    # torso
    (0,1), (1,2), (2,3),
    # left arm
    (2,4), (4,5), (5,6), (6,7),
    # right arm
    (2,8), (8,9), (9,10), (10,11),
    # left leg
    (0,12), (12,13), (13,14), (14,15),
    # right leg
    (0,16), (16,17), (17,18), (18,19),
]

# --- Parse first row into (x,y,z) per joint ---
xyz = first_row.reshape(n_joints, 3)
x, y, z = xyz[:,0], xyz[:,1], xyz[:,2]

# --- Build Plotly 3D figure for a SINGLE FRAME (verification) ---
scatter = go.Scatter3d(
    x=x, y=y, z=z,
    mode='markers+text',
    marker=dict(size=4),
    text=[f"{i}:{n}" for i,n in enumerate(JOINT_NAMES)],
    textposition="top center",
    name="Joints"
)

# Bone line segments
bone_traces = []
for (i, j) in EDGES:
    bone_traces.append(
        go.Scatter3d(
            x=[x[i], x[j]],
            y=[y[i], y[j]],
            z=[z[i], z[j]],
            mode='lines',
            line=dict(width=5),
            showlegend=False
        )
    )

# Axis ranges with small padding
mins = xyz.min(axis=0); maxs = xyz.max(axis=0)
pad = 0.05 * float(np.max(maxs - mins))
(xmin, ymin, zmin) = mins - pad
(xmax, ymax, zmax) = maxs + pad

fig = go.Figure(
    data=[scatter] + bone_traces,
    layout=go.Layout(
        title="Kinect v1 — Single-Frame Skeleton (Row 1 Only)",
        scene=dict(
            xaxis=dict(range=[xmin, xmax], title='X'),
            yaxis=dict(range=[ymin, ymax], title='Y'),
            zaxis=dict(range=[zmin, zmax], title='Z'),
            aspectmode='data'
        )
    )
)

fig.show()

# --- Optional: print coordinates to verify mapping clearly ---
print("Joint index : name -> (x, y, z)")
for i, name in enumerate(JOINT_NAMES):
    print(f"{i:2d} : {name:15s} -> ({x[i]:.6f}, {y[i]:.6f}, {z[i]:.6f})")


Upload your Kinect .txt file (each row has 60 numbers: x,y,z for 20 joints).


Saving afternoon_apurve_1.txt to afternoon_apurve_1.txt


Joint index : name -> (x, y, z)
 0 : HipCenter       -> (-0.377219, 0.676463, 2.477132)
 1 : Spine           -> (-0.367249, 0.493018, 2.566909)
 2 : ShoulderCenter  -> (-0.529421, 0.355840, 2.576270)
 3 : Head            -> (-0.194301, 0.431102, 2.561145)
 4 : ShoulderLeft    -> (-0.567583, 0.087563, 2.615399)
 5 : ElbowLeft       -> (-0.033342, 0.579121, 2.433389)
 6 : WristLeft       -> (-0.589427, -0.122288, 2.591342)
 7 : HandLeft        -> (-0.112757, 0.775669, 2.344967)
 8 : ShoulderRight   -> (-0.572328, -0.211278, 2.548121)
 9 : ElbowRight      -> (-0.178891, 0.813138, 2.315454)
10 : WristRight      -> (-0.349652, 0.147281, 2.577553)
11 : HandRight       -> (-0.346308, 0.078289, 2.574845)
12 : HipLeft         -> (-0.419842, -0.005263, 2.576520)
13 : KneeLeft        -> (-0.264351, 0.007698, 2.571115)
14 : AnkleLeft       -> (-0.458476, -0.460340, 2.673417)
15 : FootLeft        -> (-0.235985, -0.479609, 2.666875)
16 : HipRight        -> (-0.461032, -0.847880, 2.715237)
17 : KneeR

# Both Microsoft (CSV) and .txt format Motion

In [None]:
# === Colab-ready: Kinect v1 3D Skeleton (animated) — CSV (Microsoft) OR TXT (60 cols) ===
# Supports TWO formats for a SINGLE uploaded file:
#   1) Microsoft CSV per frame: [ID?] + 20 × (x,y,z,conf)  → 81 or 80 columns
#   2) Plain TXT per frame: 60 columns = 20 joints × (x,y,z)
#
# Features
# - Robust CSV delimiter/encoding detection
# - TXT loader for whitespace-delimited numeric tables
# - Optional joint-name labels on points
# - Y-as-up camera, equal (cube) aspect, generous padding to avoid clipping
# - Optional Z↔Y axis swap if your data uses Z as vertical
#
# Usage
# 1) Set the knobs below if desired.
# 2) Run the cell, upload your .csv or .txt file when prompted.
# 3) Use Play/Pause/slider to browse frames.

# ------------------- knobs -------------------
SHOW_LABELS  = False         # show "index:JOINT_NAME" next to each joint
USE_Z_AS_UP  = False        # set True if your data's vertical axis is Z (will swap Y<->Z)
PLAYBACK_MS  = 40           # animation speed (ms per frame)
PAD_FACTOR   = 0.20         # extra padding around the skeleton (increase if labels clip)
# ---------------------------------------------

import io, csv
import numpy as np
import pandas as pd
from google.colab import files
import plotly.graph_objects as go

# ---------- Upload file ----------
print("Upload a Kinect file (.csv from Microsoft OR .txt with 60 columns per frame):")
uploaded = files.upload()
fname = next(iter(uploaded.keys()))
raw_bytes = uploaded[fname]

# ---------- Helpers for CSV detection ----------
def detect_encoding(b: bytes) -> str:
    for enc in ("utf-8-sig", "utf-8", "latin-1"):
        try:
            b.decode(enc)
            return enc
        except UnicodeDecodeError:
            continue
    return "utf-8"

def sniff_delimiter(sample_text: str) -> str:
    try:
        dialect = csv.Sniffer().sniff(sample_text[:4096], delimiters=[",",";","\t"," "])
        return dialect.delimiter
    except Exception:
        first = sample_text.splitlines()[0] if sample_text.splitlines() else ""
        candidates = [",",";","\t"," "]
        return max(candidates, key=lambda d: first.count(d)) if first else ","

# ---------- Load file into xyz_all: (frames, 20, 3) ----------
def load_csv_to_xyz_all(raw_bytes: bytes) -> np.ndarray:
    enc = detect_encoding(raw_bytes)
    text = raw_bytes.decode(enc)
    sep  = sniff_delimiter(text)

    if sep == " ":
        # Replaces deprecated delim_whitespace with regex separator
        df = pd.read_csv(io.StringIO(text), engine="python", header=None, sep=r"\s+")
    else:
        df = pd.read_csv(io.StringIO(text), engine="python", header=None, sep=sep)

    # Clean up & coerce numeric
    df = df.dropna(axis=1, how="all")
    df = df.apply(pd.to_numeric, errors="coerce")
    df = df.ffill().bfill()  # replaces deprecated fillna(method=...)

    # Keep the last 81/80 columns if extra metadata present
    if df.shape[1] >= 81:
        tail81 = df.iloc[:, -81:]
        if tail81.shape[1] == 81:
            df = tail81.copy()
        else:
            tail80 = df.iloc[:, -80:]
            if tail80.shape[1] == 80:
                df = tail80.copy()

    if df.shape[1] not in (81, 80):
        raise ValueError(f"CSV: unexpected column count {df.shape[1]} (expected 81 or 80).")

    has_id = (df.shape[1] == 81)

    def row_to_xyz(row_vals):
        vals = row_vals[1:] if has_id else row_vals
        vals = np.asarray(vals, dtype=float)
        if vals.size != 80:
            raise ValueError(f"CSV row: expected 80 numbers for 20×(x,y,z,conf), got {vals.size}.")
        vals = vals.reshape(20, 4)
        return vals[:, :3]  # drop conf

    n_frames = len(df)
    xyz_all = np.stack([row_to_xyz(df.iloc[t].to_numpy()) for t in range(n_frames)], axis=0)  # (F,20,3)
    return xyz_all

def load_txt_to_xyz_all(raw_bytes: bytes) -> np.ndarray:
    # Assume whitespace-delimited numeric table; each row = frame; 60 columns = 20×(x,y,z)
    text = raw_bytes.decode("utf-8", errors="ignore")
    # Use pandas with regex separator to be robust to variable spacing
    df = pd.read_csv(io.StringIO(text), header=None, sep=r"\s+")
    arr = df.to_numpy(dtype=float)
    if arr.ndim == 1:
        arr = arr.reshape(1, -1)
    n_frames, n_cols = arr.shape
    if n_cols % 3 != 0:
        raise ValueError(f"TXT: columns must be multiple of 3, got {n_cols}.")
    n_joints = n_cols // 3
    if n_joints != 20:
        print(f"WARNING: TXT detected {n_joints} joints (expected 20). Proceeding anyway.")
    xyz_all = arr.reshape(n_frames, n_joints, 3)
    return xyz_all

# Choose loader by extension (fallback heuristic)
lower = fname.lower()
if lower.endswith(".csv"):
    xyz_all = load_csv_to_xyz_all(raw_bytes)
elif lower.endswith(".txt"):
    xyz_all = load_txt_to_xyz_all(raw_bytes)
else:
    sample_text = raw_bytes[:8192].decode("utf-8", errors="ignore")
    if any(d in sample_text for d in [",",";","\t"]):
        xyz_all = load_csv_to_xyz_all(raw_bytes)
    else:
        xyz_all = load_txt_to_xyz_all(raw_bytes)

# ---------- Optional: swap Y↔Z if your data uses Z as vertical ----------
if USE_Z_AS_UP:
    xyz_all = xyz_all.copy()
    xyz_all[..., 1], xyz_all[..., 2] = xyz_all[..., 2], xyz_all[..., 1].copy()

# ---------- Compute bounds with generous padding ----------
mins = xyz_all.reshape(-1, 3).min(axis=0)
maxs = xyz_all.reshape(-1, 3).max(axis=0)
max_range = float(np.max(maxs - mins))
pad = PAD_FACTOR * max_range

cx, cy, cz = (mins + maxs) / 2.0
xmin, xmax = cx - max_range/2 - pad, cx + max_range/2 + pad
ymin, ymax = cy - max_range/2 - pad, cy + max_range/2 + pad
zmin, zmax = cz - max_range/2 - pad, cz + max_range/2 + pad

n_frames = xyz_all.shape[0]
n_joints = xyz_all.shape[1]

# ---------- Kinect v1 names & connectivity ----------
JOINT_NAMES = [
    "HipCenter","Spine","ShoulderCenter","Head",
    "ShoulderLeft","ElbowLeft","WristLeft","HandLeft",
    "ShoulderRight","ElbowRight","WristRight","HandRight",
    "HipLeft","KneeLeft","AnkleLeft","FootLeft",
    "HipRight","KneeRight","AnkleRight","FootRight"
]
EDGES = [
    (0,1),(1,2),(2,3),
    (2,4),(4,5),(5,6),(6,7),
    (2,8),(8,9),(9,10),(10,11),
    (0,12),(12,13),(13,14),(14,15),
    (0,16),(16,17),(17,18),(18,19),
]

# If a TXT didn’t have exactly 20 joints, trim or adapt edges safely
if n_joints != 20:
    JOINT_NAMES = [f"J{i}" for i in range(n_joints)]
    EDGES = [(i, i+1) for i in range(n_joints-1)]  # simple chain

def joint_scatter(xyz, labels=False):
    return go.Scatter3d(
        x=xyz[:,0], y=xyz[:,1], z=xyz[:,2],
        mode="markers+text" if labels else "markers",
        marker=dict(size=4),
        text=[f"{i}:{JOINT_NAMES[i]}" for i in range(n_joints)] if labels else None,
        textfont=dict(size=9),
        textposition="top center",
        name="Joints"
    )

def bone_segments(xyz):
    segs = []
    for (i, j) in EDGES:
        if i < n_joints and j < n_joints:
            segs.append(go.Scatter3d(
                x=[xyz[i,0], xyz[j,0]],
                y=[xyz[i,1], xyz[j,1]],
                z=[xyz[i,2], xyz[j,2]],
                mode="lines",
                line=dict(width=5),
                showlegend=False
            ))
    return segs

# Invisible anchor so Plotly keeps ranges constant across frames
anchor = go.Scatter3d(
    x=[xmin, xmax], y=[ymin, ymax], z=[zmin, zmax],
    mode="markers", marker=dict(size=1, opacity=0), showlegend=False
)

# ---------- Build figure ----------
data0 = [anchor, joint_scatter(xyz_all[0], labels=SHOW_LABELS)] + bone_segments(xyz_all[0])

fig = go.Figure(
    data=data0,
    layout=go.Layout(
        title=f"Kinect 3D Skeleton (animated) — {fname}",
        scene=dict(
            xaxis=dict(range=[xmin, xmax], title="X"),
            yaxis=dict(range=[ymin, ymax], title="Y"),
            zaxis=dict(range=[zmin, zmax], title="Z"),
            aspectmode="cube",
            camera=dict(up=dict(x=0, y=1, z=0), eye=dict(x=2.4, y=1.8, z=2.4))
        ),
        updatemenus=[{
            "type": "buttons",
            "buttons": [
                {"label": "Play", "method": "animate",
                 "args": [None, {"frame": {"duration": PLAYBACK_MS, "redraw": True},
                                 "fromcurrent": True}]},
                {"label": "Pause", "method": "animate",
                 "args": [[None], {"mode": "immediate",
                                   "frame": {"duration": 0, "redraw": False}}]}
            ]
        }]
    ),
    frames=[]
)

# Frames
frames = []
for t in range(n_frames):
    frame_traces = [anchor, joint_scatter(xyz_all[t], labels=SHOW_LABELS)] + bone_segments(xyz_all[t])
    frames.append(go.Frame(data=frame_traces, name=str(t)))
fig.frames = frames

# Slider
sliders = [{
    "steps": [
        {"args": [[str(k)], {"frame": {"duration": 0, "redraw": True}, "mode": "immediate"}],
         "label": str(k), "method": "animate"}
        for k in range(n_frames)
    ],
    "transition": {"duration": 0},
    "x": 0.05, "len": 0.9
}]
fig.update_layout(sliders=sliders)

fig.show()

print(f"Frames: {n_frames} | Joints per frame: {n_joints} | File: {fname} | USE_Z_AS_UP={USE_Z_AS_UP}")


Output hidden; open in https://colab.research.google.com to view.