In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import sys

In [3]:
cif_names = ["7mhf", "7mhg", "7mhh", "7mhi", "7mhj", "7mhk"]
mapper_df = pd.read_csv(Path(Path.home(), "Documents/xray/dev/35_cif_combos/data/7mhf.csv"), index_col=0)
pdb_r_free_dict = {"7mhf": .224, "7mhg": .205, "7mhh": .253, "7mhi": .228, "7mhj": .240, "7mhk": .247}

exp_name = "179_exp"
analysis_dir = Path(Path.home(), "Documents/xray/sample_bench/data/7mhf", exp_name)
analysis_df = pd.read_csv(Path(analysis_dir, "ref_10000.csv"), index_col=0)
analysis_df.head()

Ns = [1, 2, 4, 8, 16]
Js = list(range(1,7))

In [6]:
best_rows = list()

exp_deltas, ref_deltas, ref_ff_deltas, job_cif_strs = list(), list(), list(), list()
for N in Ns:
    for cif_name in cif_names:
        for J in Js:
            N_J_cif_subset = analysis_df[(analysis_df["N"] == N) & (analysis_df["J"] == J) & (analysis_df["cif_name"] == cif_name)]
            best_row = N_J_cif_subset[N_J_cif_subset["r_free"] == N_J_cif_subset["r_free"].min()]
            best_rows.append(best_row)
            r_free = best_row["r_free"].values[0]
            ff = best_row["ff"].values[0]
            exp_delta = r_free - pdb_r_free_dict[cif_name]
            exp_deltas.append(exp_delta)

            N1_J1_cif_subset = analysis_df[(analysis_df["N"] == 1) & (analysis_df["J"] == 1) & (analysis_df["cif_name"] == cif_name)]
            best_N1_J1_row = N1_J1_cif_subset[N1_J1_cif_subset["r_free"] == N1_J1_cif_subset["r_free"].min()]
            ref_r_free = best_N1_J1_row["r_free"].values[0]
            ref_ff = best_N1_J1_row["ff"].values[0]
            ref_delta = r_free - ref_r_free
            ref_delta_ff = ff - ref_ff
            ref_deltas.append(ref_delta)
            ref_ff_deltas.append(ref_delta_ff)

            job_id = best_row["job_id"].values[0]
            job_cif_files = mapper_df.loc[job_id].values[0].split(",")
            job_cif_names = [Path(job_cif_file).stem for job_cif_file in job_cif_files]
            job_cif_str = ",".join(job_cif_names)
            job_cif_strs.append(job_cif_str)

            # print(cif_name, N, J, job_id, r_free, delta, ff, pdb, job_cif_names)

summary_df = pd.concat(best_rows)
summary_df["exp_delta"] = exp_deltas
summary_df["ref_delta"] = ref_deltas
summary_df["ref_ff_delta"] = ref_ff_deltas
summary_df["job_cif_str"] = job_cif_strs

summary_df.to_csv(Path(Path.home(), "Documents/xray/sample_bench/data/7mhf/{}/summary.csv".format(exp_name)))

In [14]:
for cif_name in cif_names:
    cif_subset = summary_df[summary_df["cif_name"] == cif_name]
    best_row = cif_subset[cif_subset["exp_delta"] == cif_subset["exp_delta"].min()].iloc[0]
    print(cif_name, best_row["N"], best_row["J"], best_row["exp_delta"], best_row["ref_delta"], best_row["pdb"])

7mhf 2 2 0.05541382497735217 -0.036100152313550626 /wynton/group/sali/mhancock/xray/sample_bench/out/7mhf/179_exp_ref_10000/9937.pdb
7mhg 2 3 0.0640811391894697 -0.025031366353565287 /wynton/group/sali/mhancock/xray/sample_bench/out/7mhf/179_exp_ref_10000/11905.pdb
7mhh 8 3 -0.044161278013805594 -0.09013741418554289 /wynton/group/sali/mhancock/xray/sample_bench/out/7mhf/179_exp_ref_10000/33020.pdb
7mhi 8 3 -0.0333480001096528 -0.09072028781394759 /wynton/group/sali/mhancock/xray/sample_bench/out/7mhf/179_exp_ref_10000/33084.pdb
7mhj 8 4 -0.0356133313894845 -0.09334940703829661 /wynton/group/sali/mhancock/xray/sample_bench/out/7mhf/179_exp_ref_10000/36547.pdb
7mhk 8 3 -0.00661556797448401 -0.0707227789409178 /wynton/group/sali/mhancock/xray/sample_bench/out/7mhf/179_exp_ref_10000/33274.pdb
