## Calculate Diffusion Coefficient in a Folder LOCALLY.

In [None]:
import numpy as np
from pathlib import Path

#EDIT THIS
BIG_FOLDER = Path(r"/path/to/MSE485 Data")
DUMP_FILENAME = "state_dump"
TIMESTEP_PS = 0.001
DUMP_STRIDE = 10000
HOP_THRESHOLD_A = 1.0

VALID_T = {"700", "800", "900", "1000", "1100", "1200"}
VALID_N = {"1", "5", "10"}
VALID_DEFECT = {"Vacancy", "Interstitial"}

def compute_D_Si_hopping (dump_path, timestep_ps, dump_stride, hop_threshold):
    with open (dump_path, "r") as f:
        first_frame = True
        frame_count = 0

        id_col = type_col = x_col = y_col = z_col = None
        N = None
        types = None
        pos_prev = pos_curr = None
        sum_l2_Si = 0.0

        while True:
            line = f.readline()
            if not line:
                break
            if not line.startswith("ITEM: TIMESTEP"):
                continue

            timestep_val = int(f.readline().strip())
            line = f.readline()
            N_this = int(f.readline().strip())

            line = f.readline()
            bounds = []
            for _ in range (3):
                parts = f.readline().split()
                lo, hi = float(parts[0]), float(parts[1])
                bounds.append((lo, hi))
            Lx = bounds[0][1] - bounds[0][0]
            Ly = bounds[1][1] - bounds[1][0]
            Lz = bounds[2][1] - bounds[2][0]

            header = f.readline().strip().split()
            cols = header[2:]

            if id_col is None:
                id_col = cols.index("id")
                type_col = cols.index("type")
                x_col = cols.index("x")
                y_col = cols.index("y")
                z_col = cols.index("z")


            if first_frame:
                N = N_this
                types = np.zeros(N, dtype=int)
                pos_prev = np.zeros((N, 3), dtype=float)
                pos_curr = np.zeros((N, 3), dtype=float)
                first_frame = False

            for _ in range(N_this):
                parts = f.readline().split()
                idx = int(parts[id_col]) - 1
                types[idx] = int(parts[type_col])
                pos_curr[idx, 0] = float(parts[x_col])
                pos_curr[idx, 1] = float(parts[y_col])
                pos_curr[idx, 2] = float(parts[z_col])

            if frame_count == 0:
                pos_prev[:] = pos_curr
            else:
                d = pos_curr - pos_prev
                d[:, 0] -= Lx * np.round(d[:, 0] / Lx)
                d[:, 1] -= Ly * np.round(d[:, 1] / Ly)
                d[:, 2] -= Lz * np.round(d[:, 2] / Lz)

                dr = np.linalg.norm(d, axis=1)
                hop_mask = dr > hop_threshold
                mask_Si = (types == 1) & hop_mask

                if np.any(mask_Si):
                    sum_l2_Si += np.sum(dr[mask_Si] ** 2)

                pos_prev[:] = pos_curr

            frame_count += 1
        
        if frame_count < 2:
            raise RuntimeError(f"Not enough frames in dump {dump_path}")

        dt_frame_ps = timestep_ps * dump_stride
        t_total_ps = (frame_count - 1) * dt_frame_ps

        N_Si = float(np.sum(types == 1))
        if N_Si <= 0:
            return 0.0

        msd_Si = sum_l2_Si / N_Si
        D_Si = msd_Si / (6.0 * t_total_ps)
        return D_Si


def select_condition_dirs (root):
    dirs = []
    for d in root.iterdir():
        if not d.is_dir():
            continue
        parts = d.name.split()
        if len(parts) != 4:
            continue
        T, K_label, Ndef, defect = parts
        if K_label != "K":
            continue
        if T not in VALID_T:
            continue
        if Ndef not in VALID_N:
            continue
        if defect not in VALID_DEFECT:
            continue
        if not (d / DUMP_FILENAME).exists():
            continue
        dirs.append((int(T), defect, int(Ndef), d))
    dirs.sort()
    return [item[3] for item in dirs]

def main():
    condition_dirs = select_condition_dirs(BIG_FOLDER)
    summary_lines = [
         "# D_Si from hopping only, units Å^2/ps",
        "# hop threshold = %.3f Å" % HOP_THRESHOLD_A,
        "condition\tD_Si_A2_per_ps",
    ]
    for cond in condition_dirs:
        dump_path = cond / DUMP_FILENAME
        print("Processing", cond.name)
        D_Si = compute_D_Si_hopping(
            dump_path,
            TIMESTEP_PS,
            DUMP_STRIDE,
            HOP_THRESHOLD_A,
        )
        out_file = cond / "D_Si_hopping.txt"
        with open(out_file, "w") as g:
            g.write("condition %s\n" % cond.name)
            g.write("hop_threshold_A %.6f\n" % HOP_THRESHOLD_A)
            g.write("D_Si_A2_per_ps %.6e\n" % D_Si)
        summary_lines.append(f"{cond.name}\t{D_Si:.6e}")
    
    summary_path = BIG_FOLDER / "D_Si_hopping_700-1200K_1-5-10_vac_int.txt"
    with open(summary_path, "w") as g:
        g.write("\n".join(summary_lines))

    print("Done. Summary written to", summary_path)

if __name__ == "__main__":
    main()

    



