# HKDataMiner Tutorial: Alanine Dipeptide

This notebook reproduces the standard tutorial workflow using the Python API.

In [None]:
import os
import shutil
import tarfile
from hkdataminer import workflows

## 1. Setup Data
Unpack the tutorial data.

In [None]:
project_root = os.path.abspath("..")
tutorial_tar = os.path.join(project_root, "Tutorial.tar.gz")
work_dir = os.path.join(project_root, "_notebook_run")

if os.path.exists(work_dir):
    shutil.rmtree(work_dir)
os.makedirs(work_dir)

print(f"Extracting data to {work_dir}...")
with tarfile.open(tutorial_tar, "r:gz") as tar:
    tar.extractall(path=work_dir)

data_dir = os.path.join(work_dir, "Tutorial")
os.chdir(data_dir)

## 2. Clustering
Run K-Centers clustering.

In [None]:
assign_file = workflows.run_clustering(
    trajListFns='trajlist',
    atomListFns='atom_indices',
    topology='native.pdb',
    homedir='.',
    iext='xtc',
    n_clusters=100,
    output_dir='.'
)
print(f"Assignments saved to: {assign_file}")

## 3. Lumping
Lump microstates into macrostates using PCCA.

In [None]:
workflows.run_lumping(
    assignments_file=assign_file,
    traj_len_file='traj_len.txt',
    n_macro_states=4,
    homedir='.'
)

## 4. Visualize Results
Display the generated plots.

In [None]:
from IPython.display import Image
display(Image("kcenters_n_100.png"))

In [None]:
display(Image("micro_100_PCCA_4.png"))