In [1]:
# 导入必要的库
import sys
import os
import pandas as pd
import numpy as np
from rdkit import Chem
from pathlib import Path
import time

# 添加项目根目录到系统路径
project_root = Path().absolute().parent
sys.path.append(str(project_root))

# 导入项目相关模块
from core.molecule_processor import MoleculeProcessor
from core.gaussian_calculator import GaussianCalculator
from visualization.plot_manager import PlotManager
from utils.cluster_monitor import ClusterMonitor

# some calculations for wang's paper
name_list = ['S','A','EHOPA','AEPYridine','AEP','APN','DBE','DIPEDA','OA','mXD']

base_list = [
'FC1(C(F)(C(F)(F)F)F)C(OC(F)(F)C(F)1F)(C(F)(C(F)(F)F)F)F',
'FC1(C(F)(C(F)(F)F)F)C(OC(F)(F)C(F)1F)(C(F)(C(O)=O)F)F',
'CCCCC(CC)COCCCNC(C(F)(F)C1(F)OC(F)(F)C(F)(F)C1(C(F)(C(O)=O)F)F)=O',
'FC1(F)C(F)(F)C(F)(C(F)(C(O)=O)F)C(F)(C(F)(C(NCCC2=CN=CC=C2)=O)F)O1',
'FC1(F)C(F)(F)C(F)(C(F)(C(O)=O)F)C(F)(C(F)(C(NCCN2CCCCC2)=O)F)O1',
'FC1(F)C(F)(F)C(F)(C(F)(C(O)=O)F)C(F)(C(F)(C(NC2=CC(C#N)=CC(C#N)=C2)=O)F)O1',
'FC1(F)C(F)(F)C(F)(C(F)(C(O)=O)F)C(F)(C(F)(C(NCCN(CCCC)CCCC)=O)F)O1',
'FC1(F)C(F)(F)C(F)(C(F)(C(O)=O)F)C(F)(C(F)(C(NCCN(C(C)C)C(C)C)=O)F)O1',
'FC1(F)C(F)(F)C(F)(C(F)(C(O)=O)F)C(F)(C(F)(C(NCCCCCCCC)=O)F)O1',
'FC1(F)C(F)(F)C(F)(C(F)(C(O)=O)F)C(F)(C(F)(C(NCC2=CC(CN)=CC=C2)=O)F)O1',
]

set_name = 'cytop'
iteration = 3
calculator_params = {
    'parent_dir': set_name,
    'method': 'CAM-B3LYP',  # DFT泛函
    'basis': '6-31G(d,p)',  # 基组
    'opt': False,  # 进行结构优化
    'dispersion': False,  # 包含色散校正
    'polar': False,  # 不计算极化率
    'volume': False,  # 不计算体积
    'pcm': True,  # 不使用PCM溶剂化模型
    'eps': 2.05,  # PCM模型的介电常数
    'wfn': True,  # 输出波函数文件
    'debug': True  # 非调试模式
}



In [2]:
# 使用PlotManager绘制分子结构
PlotManager.plot_molecules(
    smiles_list=base_list,
    name_list=name_list,
    parent_dir=set_name,
    save_path='molecular_structures.pdf',
    cols=5,  # 每行显示4个分子
    with_timestamp=False
)


In [3]:
# ... earlier code remains the same ...

# 为每个FFKM分子生成多个xyz文件
for i, (mol_smiles, mol_name) in enumerate(zip(base_list, name_list)):
    success_count = 0
    attempt = 0
    max_attempts = 20  # 最大尝试次数，防止无限循环
    
    while success_count < iteration and attempt < max_attempts:
        try:
            current_seed = 42 + attempt
            
            # 修改文件命名格式，使用 init 作为后缀
            xyz_filename = f"{mol_name}{success_count}_init.xyz"
            
            MoleculeProcessor.smile2xyz(
                xyz_name=xyz_filename,
                smile=mol_smiles,
                randomSeed=current_seed,
                parent_dir=set_name
            )
            
            success_count += 1
            # print(f"成功生成 {mol_name} 的第 {success_count} 个初始构象")
            
        except Exception as e:
            print(f"尝试生成 {mol_name} 第 {success_count + 1} 个构象时失败: {e}")
        
        attempt += 1
    
    if success_count < iteration:
        print(f"警告: {mol_name} 只成功生成了 {success_count} 个构象")

In [4]:
# 为每个分子准备高斯计算文件
monitor = ClusterMonitor()

for conf_id in range(iteration):
    try:
        for name in name_list:
            while monitor.is_queue_full(4):
                time.sleep(60)
        # 为每个构象准备2种电荷态的计算文件
            xyz_file = f"{name}{conf_id}_init.xyz"
            
            # 准备中性态计算文件
            neutral_calc = GaussianCalculator(**calculator_params, charge='neu')
            neutral_calc._prepare_calculation(xyz_file)
            neutral_calc._generate_input_file()
            neutral_calc._generate_shell_script()
            neutral_calc._submit_job()
            
            
            # # 准备阳离子态计算文件（用于IP计算）
            # cation_calc = GaussianCalculator(**calculator_params, charge='pos')
            # cation_calc._prepare_calculation(xyz_file)
            # cation_calc._generate_input_file()
            # cation_calc._generate_shell_script()
            # cation_calc._submit_job()
                    
    except Exception as e:
        print(f"处理{name}时出错: {str(e)}")

Submitted batch job 2328128
Submitted batch job 2328129
Submitted batch job 2328130
Submitted batch job 2328131
Submitted batch job 2328132
Submitted batch job 2328133
Submitted batch job 2328134
Submitted batch job 2328135
Submitted batch job 2328136
Submitted batch job 2328137
Submitted batch job 2328139
Submitted batch job 2328140
Submitted batch job 2328141
Submitted batch job 2328142
Submitted batch job 2328143
Submitted batch job 2328144
Submitted batch job 2328145
Submitted batch job 2328146
Submitted batch job 2328148
Submitted batch job 2328150
Submitted batch job 2328152
Submitted batch job 2328153
Submitted batch job 2328154
Submitted batch job 2328156
Submitted batch job 2328157
Submitted batch job 2328158
Submitted batch job 2328160
Submitted batch job 2328161
Submitted batch job 2328162
Submitted batch job 2328163


In [7]:
from data.data_processor import DataProcessor
from tqdm import tqdm
# 定义要分析的分子名称列表

# 设置迭代次数
iterations = 3
# # 使用DataProcessor创建IP属性的数据框
# print("正在计算IP值...")
ip_df = DataProcessor.create_property_dataframe(name_list, iterations, 'LUMO+1',parent_dir=set_name)
ip_df

# # 清理数据中的异常值
# print("\n清理数据中的异常值...")
# clean_ip_df = DataProcessor.clean_data(ip_df)

# 转换为numpy数组并保存结果
# ip_array = clean_ip_df.to_numpy()
# print("\nIP计算结果（单位：eV）:")
# print("分子顺序:", molecule_names)
# print("IP值:\n", ip_array)

# # 计算每个分子的统计值并生成可视化
# print("\n各分子的IP统计分析:")
# for i, molecule in enumerate(molecule_names):
#     values = ip_array[:, i]
#     median, mean = DataProcessor.analyze_property(values.tolist(), molecule, 'IP')

  0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 10/10 [00:02<00:00,  4.13it/s]
100%|██████████| 10/10 [00:00<00:00, 39.37it/s]
100%|██████████| 10/10 [00:00<00:00, 21.96it/s]


Unnamed: 0,S,A,EHOPA,AEPYridine,AEP,APN,DBE,DIPEDA,OA,mXD
0,1.554859,0.712394,0.720014,0.502867,0.654706,-0.832125,0.498785,0.519194,0.798382,0.413069
1,1.409823,1.112674,0.634298,0.492254,0.721102,-0.812805,0.608447,0.644638,0.604637,0.485996
2,1.554587,1.093082,0.71049,0.473206,0.610352,-0.837839,0.680557,0.685999,0.631577,0.57797


In [3]:
ip_df

Unnamed: 0,FFKM,TAG,TAIC,LDAI1C,D2D
0,-12.101726,-7.930218,-9.275006,-9.288611,-9.280176
1,-12.114515,-7.984641,-9.299224,-9.267114,-9.255958
2,-11.80104,-8.035526,-9.345211,-9.29106,-9.253237
