<a href="https://colab.research.google.com/github/sushirito/Molecular-Dynamics/blob/OpenMM/OpenMM_Scripting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##**NOTE**: Code must be run twice to work

In [2]:
!pip install -q condacolab
import condacolab
condacolab.install()  # Will restart the runtime; re-run after restart

!mamba install -c conda-forge ambertools -y

✨🍰✨ Everything looks OK!

Looking for: ['ambertools']

[?25l[2K[0G[+] 0.0s
[2K[1A[2K[0Gconda-forge/linux-64                                          No change
[+] 0.1s
conda-forge/noarch  ⣾  [2K[1A[2K[0G[+] 0.2s
conda-forge/noarch  23%[2K[1A[2K[0G[+] 0.3s
conda-forge/noarch  48%[2K[1A[2K[0G[+] 0.4s
conda-forge/noarch  71%[2K[1A[2K[0G[+] 0.5s
conda-forge/noarch  94%[2K[1A[2K[0G[+] 0.6s
conda-forge/noarch  94%[2K[1A[2K[0Gconda-forge/noarch                                
[?25h
Pinned packages:
  - python 3.11.*
  - python 3.11.*
  - python_abi 3.11.* *cp311*
  - cuda-version 12.*


Transaction

  Prefix: /usr/local

  All requested packages already installed

[?25l[2K[0G[?25h

In [3]:
!git clone https://github.com/Iourarum/GOPY.git
%cd GOPY
!python GOPY.py generate_PG 10 10 graphene.pdb
!python GOPY.py generate_GO graphene.pdb 2 4 8 functionalized.pdb

fatal: destination path 'GOPY' already exists and is not an empty directory.
/content/GOPY
done.
GOPY.py
generate_GO
graphene.pdb
2
4
8
functionalized.pdb
Left to add:  cooh:  2 epoxy:  4 hydroxyl:  8
Placed:
carboxyl:  0.0
epoxy:  0.0
hydroxyl:  0.0
graphene atoms (CX - GGG) left:  100


In [4]:
!wget https://raw.githubusercontent.com/Iourarum/GOPY/master/GO_tutorial/GGG.lib
!wget https://raw.githubusercontent.com/Iourarum/GOPY/master/GO_tutorial/C1A.lib
!wget https://raw.githubusercontent.com/Iourarum/GOPY/master/GO_tutorial/E1A.lib
!wget https://raw.githubusercontent.com/Iourarum/GOPY/master/GO_tutorial/H1A.lib
!mkdir -p FRCMOD
!wget -P FRCMOD https://raw.githubusercontent.com/Iourarum/GOPY/master/GO_tutorial/FRCMOD/GO.frcmod

--2025-01-27 07:57:45--  https://raw.githubusercontent.com/Iourarum/GOPY/master/GO_tutorial/GGG.lib
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1063 (1.0K) [text/plain]
Saving to: ‘GGG.lib.1’


2025-01-27 07:57:46 (41.9 MB/s) - ‘GGG.lib.1’ saved [1063/1063]

--2025-01-27 07:57:46--  https://raw.githubusercontent.com/Iourarum/GOPY/master/GO_tutorial/C1A.lib
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1627 (1.6K) [text/plain]
Saving to: ‘C1A.lib.1’


2025-01-27 07:57:46 (13.0 MB/s) - ‘C1A.lib.1’ saved [1627/1627]

--2025-01-

In [5]:
!pip install openmm
!pip install MDAnalysis



In [24]:
# Atomic masses in atomic mass units (u)
atomic_masses = {
    'H': 1.0079,  # Hydrogen
    'C': 12.011,  # Carbon
    'N': 14.007,  # Nitrogen
    'O': 15.999,  # Oxygen
    'P': 30.974,  # Phosphorus
    'S': 32.06,   # Sulfur
}

# Conversion factor from atomic mass units to grams
amu_to_grams = 1.66054e-24

def calculate_total_mass(file_path):
    total_mass = 0.0

    # Read the .pdb file
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('ATOM'):  # Focus on ATOM entries
                atom_type = line[12:14].strip()  # Extract the atom type
                element = atom_type[0]  # Get the first character of the atom type
                if element in atomic_masses:
                    total_mass += atomic_masses[element]  # Add the atomic mass

    # Convert the total mass to grams
    total_mass_grams = total_mass * amu_to_grams
    return total_mass_grams

# Path to the .pdb file
file_path = "/content/GOPY/functionalized.pdb"

# Calculate and print the total mass
carbon_mass = calculate_total_mass(file_path)
print(f"Total mass of the system: {carbon_mass:.6e} grams")

Total mass of the system: 1.478935e-21 grams


In [8]:
from openmm.app import AmberPrmtopFile, AmberInpcrdFile, Simulation, PDBReporter, PME, HBonds
from openmm import LangevinMiddleIntegrator, Platform
from openmm.unit import kelvin, picosecond, nanometer, picoseconds
import os, numpy as np, MDAnalysis as mda
from MDAnalysis.analysis.rdf import InterRDF
from scipy.signal import find_peaks
from scipy.spatial import cKDTree
from scipy.constants import Avogadro

def run_simulation_and_analyze(Na_count, box_dims, mass_adsorbent=1.0, initial_counts=None):
    # Create leap input for a given Na_count
    leap_content = f"""source leaprc.protein.ff14SB
source leaprc.water.tip3p
loadoff GGG.lib
loadoff C1A.lib
loadoff E1A.lib
loadoff H1A.lib
loadamberparams FRCMOD/GO.frcmod
mol = loadpdb functionalized.pdb
bondbydistance mol
solvateBox mol TIP3PBOX 5.0
addIons2 mol Na+ {Na_count}
addIons2 mol Cl- {Na_count}
saveamberparm mol mol_solv.prmtop mol_solv.inpcrd
quit
"""
    with open('leap.in', 'w') as f:
        f.write(leap_content)
    os.system('tleap -f leap.in')

    # Run simulation
    prmtop = AmberPrmtopFile('mol_solv.prmtop')
    inpcrd = AmberInpcrdFile('mol_solv.inpcrd')
    topology = prmtop.topology
    positions = inpcrd.positions

    system = prmtop.createSystem(nonbondedMethod=PME, nonbondedCutoff=1.0*nanometer, constraints=HBonds)
    integrator = LangevinMiddleIntegrator(300*kelvin, 1/picosecond, 0.004*picoseconds)
    platform = Platform.getPlatformByName('CPU')
    simulation = Simulation(topology, system, integrator, platform)
    simulation.context.setPositions(positions)
    simulation.minimizeEnergy()
    simulation.reporters.append(PDBReporter('trajectory.pdb', 100))
    simulation.step(250000)

    # Analyze trajectory for Ce, qe, and RDF
    u = mda.Universe('mol_solv.prmtop', 'trajectory.pdb')
    for ts in u.trajectory:
        ts.dimensions = [box_dims[0], box_dims[1], box_dims[2], 90.0, 90.0, 90.0]
    u.trajectory.rewind()

    protein = u.select_atoms('resname GGG E1A H1A C1A')
    Na = u.select_atoms('name Na+')

    # RDF computation
    r_min, r_max = 0.0, 40.0
    rdf_calc = InterRDF(protein, Na, range=(r_min, r_max), nbins=120)
    rdf_calc.run()

    peaks, _ = find_peaks(rdf_calc.rdf)
    if len(peaks) > 0:
        first_peak = peaks[0]
        inverted = -rdf_calc.rdf
        minima, _ = find_peaks(inverted, distance=10)
        cands = minima[minima > first_peak]
        r_cut = rdf_calc.bins[cands[0]] if len(cands) > 0 else r_max
    else:
        r_cut = r_max

    adsorbed = 0
    total_frames = 0
    ref_group = protein
    target_group = Na
    for ts in u.trajectory:
        p_ref = ref_group.positions
        p_tg = target_group.positions
        tr_ref = cKDTree(p_ref)
        tr_tg = cKDTree(p_tg)
        sdm = tr_ref.sparse_distance_matrix(tr_tg, max_distance=r_cut, output_type='coo_matrix')
        col_ids = np.unique(sdm.col)
        adsorbed += len(col_ids)
        total_frames += 1

    avg_adsorbed = adsorbed / total_frames if total_frames > 0 else 0.0

    MW_Na = 22.99
    if initial_counts is None:
        initial_counts = {'Na': Na_count}
    N_initial = initial_counts['Na']

    volume_A3 = box_dims[0] * box_dims[1] * box_dims[2]
    volume_L = volume_A3 * 1e-27

    N_unads = N_initial - avg_adsorbed
    C_e = (N_unads * MW_Na / Avogadro) / volume_L * 1e3  # mg/L
    q_e = (avg_adsorbed * MW_Na) / mass_adsorbent * 1e3  # mg/g

    return C_e, q_e, rdf_calc.bins, rdf_calc.rdf

# Loop over different Na⁺ counts
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import csv

Na_counts = [10, 20, 30, 40, 50, 60, 70, 80]
box_dims = (100, 100, 100)
Ce_values = []
qe_values = []
rdf_data = []

for count in Na_counts:
    Ce, qe, r, rdf = run_simulation_and_analyze(count, box_dims, mass_adsorbent=50, initial_counts={'Na': count})
    Ce_values.append(Ce)
    qe_values.append(qe)
    rdf_data.append((count, r, rdf))
    print(f"Na_count: {count}, Ce: {Ce:.2f} mg/L, qe: {qe:.2f} mg/g")

# Save simulation data to CSV
with open('sorption_data.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Na_count', 'Ce (mg/L)', 'q_e (mg/g)'])
    for count, Ce, qe in zip(Na_counts, Ce_values, qe_values):
        writer.writerow([count, Ce, qe])

# Langmuir isotherm fitting and plotting
def langmuir(C, q_max, K_L):
    return (q_max * K_L * C) / (1 + K_L * C)

if len(Ce_values) > 2:
    params, _ = curve_fit(langmuir, Ce_values, qe_values, p0=[max(qe_values), 0.1], maxfev = 10000)
    q_max, K_L = params

    Ce_fit = np.linspace(min(Ce_values), max(Ce_values), 100)
    qe_fit = langmuir(Ce_fit, *params)

    plt.figure(figsize=(8, 6))
    plt.scatter(Ce_values, qe_values, c='b', label='Simulation Data')
    plt.plot(Ce_fit, qe_fit, 'r--', label='Langmuir Fit')
    plt.xlabel('Cₑ (mg/L)')
    plt.ylabel('qₑ (mg/g)')
    plt.title('Na⁺ Sorption Isotherm on Activated Carbon')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    print(f"Langmuir fit parameters:\nq_max = {q_max:.2f} mg/g\nK_L = {K_L:.4f} L/mg")

# RDF plotting for each trial
plt.figure(figsize=(10, 6))
for count, r, rdf in rdf_data:
    plt.plot(r, rdf, label=f'Na_count={count}')
plt.xlabel('Distance r (Å)')
plt.ylabel('g(r)')
plt.title('Radial Distribution Function (RDF) for Each Na⁺ Count')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

KeyError: 'POINTERS'

In [15]:
from openmm.app import AmberPrmtopFile, AmberInpcrdFile, Simulation, PDBReporter, PME, HBonds
from openmm import LangevinMiddleIntegrator, Platform
from openmm.unit import kelvin, picosecond, nanometer, picosecond
import os, numpy as np, MDAnalysis as mda
from MDAnalysis.analysis.rdf import InterRDF
from scipy.signal import find_peaks
from scipy.spatial import cKDTree
from scipy.constants import Avogadro
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import csv

def calculate_mass_adsorbent(pdb_file, element_masses):
    u = mda.Universe(pdb_file)
    adsorbent = u.select_atoms('resname GGG E1A H1A C1A')
    total_mass = 0.0
    for atom in adsorbent:
        total_mass += element_masses.get(atom.element, 0.0)  # Default to 0 if element not found
    return total_mass  # in atomic mass units (amu)

def run_simulation_and_analyze(Na_count, box_dims, mass_adsorbent, initial_counts=None):
    # Create leap input for a given Na_count
    leap_content = f"""source leaprc.protein.ff14SB
source leaprc.water.tip3p
loadoff GGG.lib
loadoff C1A.lib
loadoff E1A.lib
loadoff H1A.lib
loadamberparams FRCMOD/GO.frcmod
mol = loadpdb functionalized.pdb
bondbydistance mol
solvateBox mol TIP3PBOX 5.0
addIons2 mol Na+ {Na_count}
addIons2 mol Cl- {Na_count}
saveamberparm mol mol_solv.prmtop mol_solv.inpcrd
quit
"""
    with open('leap.in', 'w') as f:
        f.write(leap_content)
    os.system('tleap -f leap.in')

    # Run simulation
    prmtop = AmberPrmtopFile('mol_solv.prmtop')
    inpcrd = AmberInpcrdFile('mol_solv.inpcrd')
    topology = prmtop.topology
    positions = inpcrd.positions

    system = prmtop.createSystem(nonbondedMethod=PME, nonbondedCutoff=1.0*nanometer, constraints=HBonds)
    integrator = LangevinMiddleIntegrator(300*kelvin, 1/picosecond, 0.002*picoseconds)  # Reduced timestep for stability
    platform = Platform.getPlatformByName('CPU')
    simulation = Simulation(topology, system, integrator, platform)
    simulation.context.setPositions(positions)
    simulation.minimizeEnergy()
    simulation.reporters.append(PDBReporter('trajectory.pdb', 1000))  # Save every 4 ps (1000 steps)
    simulation.step(250000)  # 250,000 steps * 0.002 ps = 500 ps (adjust as needed)

    # Analyze trajectory for Ce, qe, and RDF
    u = mda.Universe('mol_solv.prmtop', 'trajectory.pdb')
    for ts in u.trajectory:
        ts.dimensions = [box_dims[0], box_dims[1], box_dims[2], 90.0, 90.0, 90.0]
    u.trajectory.rewind()

    protein = u.select_atoms('resname GGG E1A H1A C1A')
    Na = u.select_atoms('name Na+')

    # RDF computation
    r_min, r_max = 0.0, 10.0  # in Å
    rdf_calc = InterRDF(protein, Na, range=(r_min, r_max), nbins=120)
    rdf_calc.run()

    peaks, _ = find_peaks(rdf_calc.rdf)
    if len(peaks) > 0:
        first_peak = peaks[0]
        inverted = -rdf_calc.rdf
        minima, _ = find_peaks(inverted, distance=10)
        cands = minima[minima > first_peak]
        r_cut = rdf_calc.bins[cands[0]] if len(cands) > 0 else r_max
    else:
        r_cut = r_max

    adsorbed = 0
    total_frames = 0
    ref_group = protein
    target_group = Na
    for ts in u.trajectory:
        p_ref = ref_group.positions
        p_tg = target_group.positions
        tr_ref = cKDTree(p_ref)
        tr_tg = cKDTree(p_tg)
        sdm = tr_ref.sparse_distance_matrix(tr_tg, max_distance=r_cut, output_type='coo_matrix')
        col_ids = np.unique(sdm.col)
        adsorbed += len(col_ids)
        total_frames += 1

    avg_adsorbed = adsorbed / total_frames if total_frames > 0 else 0.0

    MW_Na = 22.99  # g/mol
    if initial_counts is None:
        initial_counts = {'Na': Na_count}
    N_initial = initial_counts['Na']

    volume_A3 = box_dims[0] * box_dims[1] * box_dims[2]  # Å³
    volume_L = volume_A3 * 1e-27  # Correct conversion to L
    N_unads = N_initial - avg_adsorbed
    C_e = (N_unads * MW_Na / Avogadro) / volume_L * 1e3  # mg/L
    q_e = (avg_adsorbed * MW_Na) / mass_adsorbent * 1e3  # mg/g

    return C_e, q_e, rdf_calc.bins, rdf_calc.rdf

# Define atomic masses (g/mol)
element_masses = {
    'H': 1.008,
    'C': 12.011,
    'N': 14.007,
    'O': 15.999,
    # Add other elements as needed
}

# Calculate mass_adsorbent in grams
mass_adsorbent_amu = calculate_mass_adsorbent('functionalized.pdb', element_masses)
mass_adsorbent = mass_adsorbent_amu / Avogadro  # Convert amu to grams
print(f"Mass of adsorbent: {mass_adsorbent} grams")

# Loop over different Na⁺ counts
Na_counts = [20, 40, 60, 80]
box_dims = (150.0, 150.0, 100.0)  # Assuming in Å
Ce_values = []
qe_values = []
rdf_data = []

for count in Na_counts:
    Ce, qe, r, rdf = run_simulation_and_analyze(count, box_dims, mass_adsorbent, initial_counts={'Na': count})
    Ce_values.append(Ce)
    qe_values.append(qe)
    rdf_data.append((count, r, rdf))
    print(f"Na_count: {count}, Ce: {Ce:.2f} mg/L, q_e: {qe:.2f} mg/g")

# Save simulation data to CSV
with open('sorption_data.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Na_count', 'Ce (mg/L)', 'q_e (mg/g)'])
    for count, Ce, qe in zip(Na_counts, Ce_values, qe_values):
        writer.writerow([count, Ce, qe])

# Langmuir isotherm fitting and plotting
def langmuir(C, q_max, K_L):
    return (q_max * K_L * C) / (1 + K_L * C)

if len(Ce_values) > 2:
    params, _ = curve_fit(langmuir, Ce_values, qe_values, p0=[max(qe_values), 0.1], maxfev=10000)
    q_max, K_L = params

    Ce_fit = np.linspace(min(Ce_values), max(Ce_values), 100)
    qe_fit = langmuir(Ce_fit, *params)

    plt.figure(figsize=(8, 6))
    plt.scatter(Ce_values, qe_values, c='b', label='Simulation Data')
    plt.plot(Ce_fit, qe_fit, 'r--', label='Langmuir Fit')
    plt.xlabel('Cₑ (mg/L)')
    plt.ylabel('qₑ (mg/g)')
    plt.title('Na⁺ Sorption Isotherm on Activated Carbon')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    print(f"Langmuir fit parameters:\nq_max = {q_max:.2f} mg/g\nK_L = {K_L:.4f} L/mg")

# RDF plotting for each trial
plt.figure(figsize=(10, 6))
for count, r, rdf in rdf_data:
    plt.plot(r, rdf, label=f'Na_count={count}')
plt.xlabel('Distance r (Å)')
plt.ylabel('g(r)')
plt.title('Radial Distribution Function (RDF) for Each Na⁺ Count')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()



NoDataError: This Universe does not contain element information

In [None]:
from openmm.app import AmberPrmtopFile, AmberInpcrdFile, Simulation, PDBReporter, PME, HBonds
from openmm import LangevinMiddleIntegrator, Platform
from openmm.unit import kelvin, picosecond, nanometer
import os
import numpy as np
import MDAnalysis as mda
from MDAnalysis.analysis.rdf import InterRDF
from scipy.signal import find_peaks
from scipy.spatial import cKDTree
from scipy.constants import Avogadro
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import csv

def assign_elements(u):
    """
    Assigns element symbols to atoms in the MDAnalysis Universe based on their atom names.

    Parameters:
        u (MDAnalysis.Universe): The MDAnalysis Universe object.
    """
    element_map = {
        'CX': 'C',
        'CY': 'C',
        'CZ': 'C',
        'C4': 'C',
        'OJ': 'O',
        'OK': 'O',
        'OE': 'O',
        'OL': 'O',
        'HK': 'H',
        'C1A': 'C',
    }

    # Initialize a list to hold element symbols
    elements = []

    for atom in u.atoms:
        atom_name = atom.name.strip()
        # Assign element based on the mapping
        element = element_map.get(atom_name, 'X')  # 'X' as default for unknown elements
        elements.append(element)

    # Assign the elements to the Universe
    # MDAnalysis expects elements to be a list of strings
    u.atoms.elements = elements

def calculate_mass_adsorbent(u, element_masses):
    """
    Calculates the mass of the adsorbent based on the provided Universe and element masses.

    Parameters:
        u (MDAnalysis.Universe): The MDAnalysis Universe object with assigned elements.
        element_masses (dict): Dictionary mapping element symbols to their atomic masses (g/mol).

    Returns:
        float: Mass of the adsorbent in grams.
    """
    adsorbent = u.select_atoms('resname GGG E1A H1A C1A')
    total_mass = 0.0
    for atom in adsorbent:
        element = atom.element.strip() if atom.element else 'X'
        mass = element_masses.get(element, 0.0)  # Default to 0 if element not found
        total_mass += mass
    return total_mass / Avogadro  # Convert from g/mol to grams

def run_simulation_and_analyze(Na_count, box_dims, mass_adsorbent, initial_counts=None, pdb_file='functionalized.pdb'):
    """
    Runs the molecular dynamics simulation and analyzes the sorption isotherm.

    Parameters:
        Na_count (int): Number of Na+ ions to add to the system.
        box_dims (tuple): Dimensions of the simulation box in Ångströms (x, y, z).
        mass_adsorbent (float): Mass of the adsorbent in grams.
        initial_counts (dict, optional): Initial ion counts. Defaults to {'Na': Na_count}.
        pdb_file (str, optional): Path to the PDB file. Defaults to 'functionalized.pdb'.

    Returns:
        tuple: (C_e, q_e, rdf_bins, rdf_values)
    """
    # Create leap input for a given Na_count
    leap_content = f"""source leaprc.protein.ff14SB
source leaprc.water.tip3p
loadoff GGG.lib
loadoff C1A.lib
loadoff E1A.lib
loadoff H1A.lib
loadamberparams FRCMOD/GO.frcmod
mol = loadpdb {pdb_file}
bondbydistance mol
solvateBox mol TIP3PBOX 5.0
addIons2 mol Na+ {Na_count}
addIons2 mol Cl- {Na_count}
saveamberparm mol mol_solv.prmtop mol_solv.inpcrd
quit
"""
    with open('leap.in', 'w') as f:
        f.write(leap_content)
    os.system('tleap -f leap.in')

    # Run simulation
    prmtop = AmberPrmtopFile('mol_solv.prmtop')
    inpcrd = AmberInpcrdFile('mol_solv.inpcrd')
    topology = prmtop.topology
    positions = inpcrd.positions

    system = prmtop.createSystem(nonbondedMethod=PME, nonbondedCutoff=1.0*nanometer, constraints=HBonds)
    integrator = LangevinMiddleIntegrator(300*kelvin, 1/picosecond, 0.002*picosecond)  # Reduced timestep for stability
    platform = Platform.getPlatformByName('CPU')
    simulation = Simulation(topology, system, integrator, platform)
    simulation.context.setPositions(positions)
    simulation.minimizeEnergy()
    simulation.reporters.append(PDBReporter('trajectory.pdb', 1000))  # Save every 4 ps (1000 steps)
    simulation.step(5000)  # 250,000 steps * 0.002 ps = 500 ps (adjust as needed)

    # Analyze trajectory for Ce, q_e, and RDF
    u = mda.Universe('mol_solv.prmtop', 'trajectory.pdb')

    # Assign elements based on atom names
    assign_elements(u)

    # Ensure box dimensions are set correctly (convert Å to nm for OpenMM compatibility)
    # OpenMM outputs the trajectory in nm, but your box_dims are in Å
    box_dims_nm = tuple(d * 0.1 for d in box_dims)  # Convert Å to nm
    u.dimensions = [box_dims_nm[0], box_dims_nm[1], box_dims_nm[2], 90.0, 90.0, 90.0]
    u.trajectory.rewind()

    # Select relevant atom groups
    protein = u.select_atoms('resname GGG E1A H1A C1A')
    Na = u.select_atoms('name Na+')

    # RDF computation
    r_min, r_max = 0.0, 10.0  # in Å
    rdf_calc = InterRDF(protein, Na, range=(r_min, r_max), nbins=120)
    rdf_calc.run()

    peaks, _ = find_peaks(rdf_calc.rdf)
    if len(peaks) > 0:
        first_peak = peaks[0]
        inverted = -rdf_calc.rdf
        minima, _ = find_peaks(inverted, distance=10)
        cands = minima[minima > first_peak]
        r_cut = rdf_calc.bins[cands[0]] if len(cands) > 0 else r_max
    else:
        r_cut = r_max

    adsorbed = 0
    total_frames = 0
    ref_group = protein
    target_group = Na
    for ts in u.trajectory:
        p_ref = ref_group.positions
        p_tg = target_group.positions
        tr_ref = cKDTree(p_ref)
        tr_tg = cKDTree(p_tg)
        sdm = tr_ref.sparse_distance_matrix(tr_tg, max_distance=r_cut, output_type='coo_matrix')
        col_ids = np.unique(sdm.col)
        adsorbed += len(col_ids)
        total_frames += 1

    avg_adsorbed = adsorbed / total_frames if total_frames > 0 else 0.0

    MW_Na = 22.99  # g/mol
    if initial_counts is None:
        initial_counts = {'Na': Na_count}
    N_initial = initial_counts['Na']

    volume_A3 = box_dims[0] * box_dims[1] * box_dims[2]  # Å³
    volume_L = volume_A3 * 1e-27  # Correct conversion to L
    N_unads = N_initial - avg_adsorbed
    C_e = (N_unads * MW_Na / Avogadro) / volume_L * 1e3  # mg/L
    q_e = (avg_adsorbed * MW_Na) / mass_adsorbent * 1e3  # mg/g

    return C_e, q_e, rdf_calc.bins, rdf_calc.rdf

# Define atomic masses (g/mol)
element_masses = {
    'H': 1.008,
    'C': 12.011,
    'N': 14.007,
    'O': 15.999,
    'X': 0.0,  # For unknown elements
    # Add other elements as needed
}

# Step 1: Loop over different Na⁺ counts
Na_counts = [20, 40, 60, 80]
box_dims = (150.0, 150.0, 100.0)  # in Å
Ce_values = []
qe_values = []
rdf_data = []

for count in Na_counts:
    # Run simulation and analysis
    Ce, qe, r, rdf = run_simulation_and_analyze(
        count,
        box_dims,
        mass_adsorbent=1.0,  # Temporary value; will be recalculated
        initial_counts={'Na': count},
        pdb_file='functionalized.pdb'  # Your original PDB file
    )

    # To accurately calculate mass_adsorbent, we need to assign elements
    # Reload the Universe with the trajectory to assign elements
    u = mda.Universe('mol_solv.prmtop', 'trajectory.pdb')
    assign_elements(u)
    mass_adsorbent = calculate_mass_adsorbent(u, element_masses)

    # Re-run the simulation and analysis with the correct mass_adsorbent
    Ce, qe, r, rdf = run_simulation_and_analyze(
        count,
        box_dims,
        mass_adsorbent,
        initial_counts={'Na': count},
        pdb_file='functionalized.pdb'
    )

    Ce_values.append(Ce)
    qe_values.append(qe)
    rdf_data.append((count, r, rdf))
    print(f"Na_count: {count}, Ce: {Ce:.2f} mg/L, q_e: {qe:.2f} mg/g")

# Step 2: Save simulation data to CSV
with open('sorption_data.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Na_count', 'Ce (mg/L)', 'q_e (mg/g)'])
    for count, Ce, qe in zip(Na_counts, Ce_values, qe_values):
        writer.writerow([count, Ce, qe])

print("Simulation data has been saved to 'sorption_data.csv'.")

# Step 3: Langmuir isotherm fitting and plotting
def langmuir(C, q_max, K_L):
    return (q_max * K_L * C) / (1 + K_L * C)

if len(Ce_values) > 2:
    params, _ = curve_fit(langmuir, Ce_values, qe_values, p0=[max(qe_values), 0.1], maxfev=10000)
    q_max, K_L = params

    Ce_fit = np.linspace(min(Ce_values), max(Ce_values), 100)
    qe_fit = langmuir(Ce_fit, *params)

    plt.figure(figsize=(8, 6))
    plt.scatter(Ce_values, qe_values, c='b', label='Simulation Data')
    plt.plot(Ce_fit, qe_fit, 'r--', label='Langmuir Fit')
    plt.xlabel('Cₑ (mg/L)')
    plt.ylabel('qₑ (mg/g)')
    plt.title('Na⁺ Sorption Isotherm on Activated Carbon')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    print(f"Langmuir fit parameters:\nq_max = {q_max:.2f} mg/g\nK_L = {K_L:.4f} L/mg")

# Step 4: RDF plotting for each trial
plt.figure(figsize=(10, 6))
for count, r, rdf in rdf_data:
    plt.plot(r, rdf, label=f'Na_count={count}')
plt.xlabel('Distance r (Å)')
plt.ylabel('g(r)')
plt.title('Radial Distribution Function (RDF) for Each Na⁺ Count')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [16]:
from openmm.app import AmberPrmtopFile, AmberInpcrdFile, Simulation, PDBReporter, PME, HBonds
from openmm import LangevinMiddleIntegrator, Platform
from openmm.unit import kelvin, picosecond, nanometer
import os
import numpy as np
import MDAnalysis as mda
from MDAnalysis.analysis.rdf import InterRDF
from scipy.signal import find_peaks
from scipy.spatial import cKDTree
from scipy.constants import Avogadro
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import csv

def assign_elements(u):
    """
    Assigns element symbols to atoms in the MDAnalysis Universe based on their atom names.

    Parameters:
        u (MDAnalysis.Universe): The MDAnalysis Universe object.
    """
    element_map = {
        'CX': 'C',
        'CY': 'C',
        'CZ': 'C',
        'C4': 'C',
        'OJ': 'O',
        'OK': 'O',
        'OE': 'O',
        'OL': 'O',
        'HK': 'H',
        'C1A': 'C',
    }

    # Vectorized assignment using list comprehension for efficiency
    elements = [element_map.get(atom.name.strip(), 'X') for atom in u.atoms]
    u.atoms.elements = elements

def calculate_mass_adsorbent(u, element_masses):
    """
    Calculates the mass of the adsorbent based on the provided Universe and element masses.

    Parameters:
        u (MDAnalysis.Universe): The MDAnalysis Universe object with assigned elements.
        element_masses (dict): Dictionary mapping element symbols to their atomic masses (g/mol).

    Returns:
        float: Mass of the adsorbent in grams.
    """
    adsorbent = u.select_atoms('resname GGG E1A H1A C1A')
    # Vectorized mass calculation using NumPy for efficiency
    masses = np.array([element_masses.get(atom.element.strip(), 0.0) for atom in adsorbent])
    total_mass = masses.sum()
    return total_mass / Avogadro  # Convert from g/mol to grams

def generate_leap_input(Na_count, pdb_file='functionalized.pdb'):
    """
    Generates the leap input file for a given Na_count.

    Parameters:
        Na_count (int): Number of Na+ ions to add.
        pdb_file (str): Path to the PDB file.
    """
    leap_content = f"""source leaprc.protein.ff14SB
source leaprc.water.tip3p
loadoff GGG.lib
loadoff C1A.lib
loadoff E1A.lib
loadoff H1A.lib
loadamberparams FRCMOD/GO.frcmod
mol = loadpdb {pdb_file}
bondbydistance mol
solvateBox mol TIP3PBOX 5.0
addIons2 mol Na+ {Na_count}
addIons2 mol Cl- {Na_count}
saveamberparm mol mol_solv.prmtop mol_solv.inpcrd
quit
"""
    with open('leap.in', 'w') as f:
        f.write(leap_content)

def run_simulation_and_analyze(Na_count, box_dims, mass_adsorbent, pdb_file='functionalized.pdb'):
    """
    Runs the molecular dynamics simulation and analyzes the sorption isotherm.

    Parameters:
        Na_count (int): Number of Na+ ions to add to the system.
        box_dims (tuple): Dimensions of the simulation box in Ångströms (x, y, z).
        mass_adsorbent (float): Mass of the adsorbent in grams.
        pdb_file (str): Path to the PDB file.

    Returns:
        tuple: (C_e, q_e, rdf_bins, rdf_values)
    """
    # Generate and execute leap input
    generate_leap_input(Na_count, pdb_file)
    os.system('tleap -f leap.in')

    # Load Amber files
    prmtop = AmberPrmtopFile('mol_solv.prmtop')
    inpcrd = AmberInpcrdFile('mol_solv.inpcrd')
    topology = prmtop.topology
    positions = inpcrd.positions

    # Create OpenMM system
    system = prmtop.createSystem(nonbondedMethod=PME, nonbondedCutoff=1.0*nanometer, constraints=HBonds)
    integrator = LangevinMiddleIntegrator(300*kelvin, 1/picosecond, 0.002*picosecond)  # Reduced timestep for stability
    platform = Platform.getPlatformByName('CPU')
    simulation = Simulation(topology, system, integrator, platform)
    simulation.context.setPositions(positions)

    # Energy minimization and setup reporters
    simulation.minimizeEnergy()
    simulation.reporters.append(PDBReporter('trajectory.pdb', 1000))  # Save every 2 ps (1000 steps * 0.002 ps)
    simulation.step(5000)  # 10,000 steps * 0.002 ps = 20 ps (adjust as needed)

    # Analyze trajectory for C_e, q_e, and RDF
    u = mda.Universe('mol_solv.prmtop', 'trajectory.pdb')

    # Assign elements based on atom names
    assign_elements(u)

    # Set box dimensions (convert Å to nm)
    box_dims_nm = tuple(d * 0.1 for d in box_dims)  # Convert Å to nm
    u.dimensions = [box_dims_nm[0], box_dims_nm[1], box_dims_nm[2], 90.0, 90.0, 90.0]
    u.trajectory.rewind()

    # Select relevant atom groups
    protein = u.select_atoms('resname GGG E1A H1A C1A')
    Na = u.select_atoms('name Na+')

    # RDF computation
    rdf_calc = InterRDF(protein, Na, range=(0.0, 10.0), nbins=120)
    rdf_calc.run()

    # Determine cutoff distance based on RDF minima after the first peak
    peaks, _ = find_peaks(rdf_calc.rdf)
    if len(peaks) > 0:
        first_peak = peaks[0]
        inverted = -rdf_calc.rdf
        minima, _ = find_peaks(inverted, distance=10)
        cands = minima[minima > first_peak]
        r_cut = rdf_calc.bins[cands[0]] if len(cands) > 0 else 10.0
    else:
        r_cut = 10.0

    # Calculate adsorbed Na+ ions
    adsorbed = 0
    total_frames = 0
    ref_group = protein
    target_group = Na
    for ts in u.trajectory:
        p_ref = ref_group.positions
        p_tg = target_group.positions
        tr_ref = cKDTree(p_ref)
        tr_tg = cKDTree(p_tg)
        sdm = tr_ref.sparse_distance_matrix(tr_tg, max_distance=r_cut, output_type='coo_matrix')
        col_ids = np.unique(sdm.col)
        adsorbed += len(col_ids)
        total_frames += 1

    avg_adsorbed = adsorbed / total_frames if total_frames > 0 else 0.0

    # Calculate concentrations
    MW_Na = 22.99  # g/mol
    N_initial = Na_count
    volume_A3 = box_dims[0] * box_dims[1] * box_dims[2]  # Å³
    volume_L = volume_A3 * 1e-27  # Convert Å³ to L
    N_unads = N_initial - avg_adsorbed
    C_e = (N_unads * MW_Na / Avogadro) / volume_L * 1e3  # mg/L
    q_e = (avg_adsorbed * MW_Na) / mass_adsorbent * 1e3  # mg/g

    return C_e, q_e, rdf_calc.bins, rdf_calc.rdf

def langmuir_isotherm(C, q_max, K_L):
    """
    Langmuir isotherm model.

    Parameters:
        C (float or array): Equilibrium concentration.
        q_max (float): Maximum adsorption capacity.
        K_L (float): Langmuir constant.

    Returns:
        float or array: Predicted adsorption.
    """
    return (q_max * K_L * C) / (1 + K_L * C)

def main():
    # Define atomic masses (g/mol)
    element_masses = {
        'H': 1.008,
        'C': 12.011,
        'N': 14.007,
        'O': 15.999,
        'X': 0.0,  # For unknown elements
        # Add other elements as needed
    }

    # Calculate mass_adsorbent once before the loop
    u_initial = mda.Universe('functionalized.pdb')
    assign_elements(u_initial)
    mass_adsorbent = calculate_mass_adsorbent(u_initial, element_masses)
    print(f"Calculated mass_adsorbent: {mass_adsorbent:.4f} grams")

    # Define simulation parameters
    Na_counts = [20, 40, 60, 80]
    box_dims = (150.0, 150.0, 100.0)  # in Å
    Ce_values = []
    qe_values = []
    rdf_data = []

    for count in Na_counts:
        print(f"\nRunning simulation for Na_count: {count}")
        # Run simulation and analysis
        Ce, qe, r, rdf = run_simulation_and_analyze(
            Na_count=count,
            box_dims=box_dims,
            mass_adsorbent=mass_adsorbent,
            pdb_file='functionalized.pdb'
        )

        Ce_values.append(Ce)
        qe_values.append(qe)
        rdf_data.append((count, r, rdf))
        print(f"Na_count: {count}, Ce: {Ce:.2f} mg/L, q_e: {qe:.2f} mg/g")

    # Save simulation data to CSV
    with open('sorption_data.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Na_count', 'Ce (mg/L)', 'q_e (mg/g)'])
        for count, Ce, qe in zip(Na_counts, Ce_values, qe_values):
            writer.writerow([count, Ce, qe])

    print("\nSimulation data has been saved to 'sorption_data.csv'.")

    # Langmuir isotherm fitting and plotting
    if len(Ce_values) > 2:
        params, _ = curve_fit(langmuir_isotherm, Ce_values, qe_values, p0=[max(qe_values), 0.1], maxfev=10000)
        q_max, K_L = params

        Ce_fit = np.linspace(min(Ce_values), max(Ce_values), 100)
        qe_fit = langmuir_isotherm(Ce_fit, *params)

        plt.figure(figsize=(8, 6))
        plt.scatter(Ce_values, qe_values, c='b', label='Simulation Data')
        plt.plot(Ce_fit, qe_fit, 'r--', label='Langmuir Fit')
        plt.xlabel('Cₑ (mg/L)')
        plt.ylabel('qₑ (mg/g)')
        plt.title('Na⁺ Sorption Isotherm on Activated Carbon')
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()

        print(f"\nLangmuir fit parameters:\nq_max = {q_max:.2f} mg/g\nK_L = {K_L:.4f} L/mg")

    # RDF plotting for each trial
    plt.figure(figsize=(10, 6))
    for count, r, rdf in rdf_data:
        plt.plot(r, rdf, label=f'Na_count={count}')
    plt.xlabel('Distance r (Å)')
    plt.ylabel('g(r)')
    plt.title('Radial Distribution Function (RDF) for Each Na⁺ Count')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    main()




AttributeError: Cannot set arbitrary attributes to a Group