In [None]:
import os
from utils import calculate_rmsd, load_structure_data, copy_atom_group, select_nbr_mols, generate_grids
from utils.analyze_structure import select_ccl4_molecules, analyze_ccl4_structure, rotate_ccl4_molecules, select_nearest_ccl4_molecules
from ARPDF import compare_ARPDF
from search_boxes import save_ccl4_result
import json
from dataclasses import asdict
import numpy as np
import matplotlib.pyplot as plt
import MDAnalysis as mda
import pickle
from search_boxes import SearchResult
from typing import List, Dict

In [None]:
exp_dir = "tmp/exp_opt_1D_avg_2/"


In [None]:
with open(f"{exp_dir}/results.pkl", "rb") as f:
    results: List[SearchResult] = pickle.load(f)

In [None]:
similarity_values = []
modified_atoms = []
for result in results:
    similarity_values.append(result.similarity)
    modified_atoms.append(result.modified_atoms)

In [None]:
# 设置参考结构目录
reference_dir = "data/CCl4"
output_dir = os.path.join(exp_dir, "post_analysis")

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

# 加载参考结构
u1_ref, u2_ref, modified_atoms_ref, polar_axis_ref = load_structure_data(reference_dir)
ARPDF_ref = np.load(os.path.join(exp_dir, "ARPDF_ref.npy"))
with open(os.path.join(exp_dir, "metadata.json"), "r") as f:
    metadata = json.load(f)
xy_range = metadata["search_info"]["parameters"]["grids_range"]
N, M = metadata["search_info"]["parameters"]["grids_shape"]
X, Y = generate_grids(xy_range, N, M)

In [None]:
def get_ccl4_universe(universe, ccl4_mols):
    """
    Create a new universe containing only the selected CCl4 molecules
    """
    selected_atoms = universe.atoms[ccl4_mols]
    new_universe = mda.Merge(selected_atoms)
    new_universe.dimensions = universe.dimensions
    return new_universe

In [None]:
# Select the two CCl4 molecules by modified universe
ccl4_mols_ref = select_ccl4_molecules(u2_ref, 519)
# analyze the original structure of the selected CCl4 molecules
ccl4_universe_ref = get_ccl4_universe(u2_ref, ccl4_mols_ref)
ref_res = analyze_ccl4_structure(
    u2_ref, 519, selected_indices=ccl4_mols_ref
)
print(ref_res)

# rotate and save the reference structure
nbr_indices = select_nbr_mols(u2_ref, modified_atoms_ref, nbr_distance=5.0)
nbr_group = rotate_ccl4_molecules(u2_ref, ccl4_mols_ref, nbr_indices, polar_axis_ref)
nbr_group.write(os.path.join(output_dir, 'ref_nbr.gro'))

In [None]:
# 计算每个搜索结果的RMSD
rmsd_values: List[float] = []
similarity_values_final: List[float] = []
C_Cl: List[float] = []
Cl_Cl: List[float] = []
umbrella_angle: List[float] = []
for i, result in enumerate(results):
    # 获取搜索结果结构
    u2_search = result.modified_universe
    optimize_box = "tmp/exp_opt_1D_avg/optimize/"+f"structure_{i}/"+"CCl4_optimized.gro"
    optimize_log = "tmp/exp_opt_1D_avg/optimize/"+f"structure_{i}/"+"log.txt"

    # 获取搜索结果的CCl4分子
    ccl4_mols_search_list = select_nearest_ccl4_molecules(u2_search, result.molecule, n_neighbors=3)

    # 这次要算多个RMSD取最小
    rmsd_candidates = []
    u1_ref_new = mda.Universe(optimize_box)
    for ccl4_mols_search in ccl4_mols_search_list:
        ccl4_universe_search = get_ccl4_universe(u1_ref_new, ccl4_mols_search)

        selection = [0, 1, 5, 6]  # 选定自己和邻居的C和Cl

        rmsd = calculate_rmsd(ccl4_universe_search, ccl4_universe_ref, selection=selection)
        rmsd_candidates.append(rmsd)

    # 取最小值
    min_rmsd = min(rmsd_candidates)
    rmsd_values.append(min_rmsd)


    analysis = analyze_ccl4_structure(
        u1_ref_new, 
        result.modified_atoms[0], 
        selected_indices=ccl4_mols_search
    )

    Cl_Cl.append(abs(analysis.dist_CL_A_CL_B-ref_res.dist_CL_A_CL_B))
    C_Cl.append(analysis.dist_C_A_CL_A)
    umbrella_angle.append(analysis.umbrella_angle)


    with open(optimize_log, 'r') as file:
        lines = file.readlines()

    # 获取最后一行并分割成列表
    last_line = lines[-1].strip().split(',')

    # 提取最后一行中的loss值
    loss_value = float(last_line[2])
    similarity_values_final.append(-loss_value)

    print(f"Result {i+1} RMSD: {rmsd:.3f} Å, similarity: {loss_value:.3f}")

In [None]:
# 将RMSD值与相似度值进行比较
plt.figure(figsize=(12, 6))
plt.scatter(similarity_values_final, rmsd_values, alpha=0.6)
plt.xlabel('Similarity')
plt.ylabel('RMSD (Å)')
plt.title('Relationship between Similarity and RMSD')

# 添加趋势线
z = np.polyfit(similarity_values_final, rmsd_values, 1)
p = np.poly1d(z)
plt.plot(similarity_values_final, p(similarity_values_final), "r--", alpha=0.8)

plt.tight_layout()
plt.xlim(0.7,1)
plt.savefig(os.path.join(output_dir, 'similarity_vs_rmsd.png'))

plt.show()

In [None]:
# 将RMSD值与相似度值进行比较
plt.figure(figsize=(12, 6))
plt.scatter(similarity_values_final, Cl_Cl, alpha=0.6)
plt.xlabel('Similarity')
plt.ylabel('Cl-Cl (Å)')
plt.title('Relationship between Similarity and Cl-Cl')

# 添加趋势线
z = np.polyfit(similarity_values_final, Cl_Cl, 1)
p = np.poly1d(z)
plt.plot(similarity_values_final, p(similarity_values_final), "r--", alpha=0.8)

plt.tight_layout()
plt.xlim(0.7,1)
plt.savefig(os.path.join(output_dir, 'similarity_vs_ClCl.png'))

plt.show()

In [None]:
pairs = list(zip(similarity_values_final, modified_atoms, rmsd_values))

# 排序取 top 10
top_pairs = sorted(pairs, key=lambda x: x[0], reverse=True)[:10]

In [None]:
# 拆分为两个列表
top_similarities = [x[0] for x in top_pairs]
top_atoms = [x[1] for x in top_pairs]
top_rmsd = [x[2] for x in top_pairs]

In [None]:
labels = ['-'.join(map(str, group)) for group in top_atoms]

# 画图
plt.figure(figsize=(10, 5))
bars = plt.bar(range(len(top_similarities)), top_similarities, tick_label=labels)
plt.xlabel("Atom Groups")
plt.ylabel("Similarity")
plt.title("Similarity of Atom Groups")
plt.xticks(rotation=45)  # x轴文字旋转，防止重叠

# 添加数值标签
for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height + 0.01,  # 上方略微留白
             f"{top_rmsd[i]:.2f}",  # 保留两位小数
             ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.show()

In [None]:
for i, result in enumerate(results):
    if (result.molecule==313):
        u2_search = result.modified_universe
        optimize_box = "tmp/exp_opt_1D_avg/optimize/"+f"structure_{i}/"+f"CCl4_optimized.gro"
        ccl4_mols_search = select_ccl4_molecules(u2_search, result.molecule)
        u1_ref_new = mda.Universe(optimize_box)
        ccl4_universe_search = get_ccl4_universe(u1_ref, ccl4_mols_search)
        ccl4_universe_search.atoms.write('tmp/exp_opt_1D_avg/post_analysis/313_origin.gro')