<a href="https://colab.research.google.com/github/sushirito/Molecular-Dynamics/blob/23_AC_Replication/Sargassum_Replication.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Replicating carbon structure of this paper: https://www.mdpi.com/1420-3049/27/18/6040

In [None]:
%%capture
!apt-get update
!apt-get install -y build-essential cmake libfftw3-dev libjpeg-dev libpng-dev \
                    libopenmpi-dev openmpi-bin python3-dev python3-numpy git

# Clone the LAMMPS repository
%cd /content
!git clone -b stable https://github.com/lammps/lammps.git
%cd lammps

# Create a build directory and compile LAMMPS with required packages
!mkdir build
%cd build
!cmake ../cmake -DBUILD_SHARED_LIBS=yes \
                -DLAMMPS_EXCEPTIONS=yes \
                -DPKG_MOLECULE=yes \
                -DPKG_KSPACE=yes \
                -DPKG_RIGID=yes \
                -DPKG_MANYBODY=yes \
                -DPKG_USER-MISC=yes \
                -DPKG_PYTHON=yes \
                -DPYTHON_EXECUTABLE=`which python3`
!make -j4
!make install-python

# Return to the working directory
%cd /content/


In [None]:
# Install required dependencies for GOPY
!apt-get update
!apt-get install -y libgl1-mesa-glx libxi6 libxrender1
!pip install numpy scipy

# Clone the GOPY repository and navigate into it
!git clone https://github.com/Iourarum/GOPY.git
%cd GOPY

# Generate a pristine graphene sheet
!python GOPY.py generate_PG 10 10 graphene.pdb

# Functionalize the graphene:
# Arguments: path_to_file, number_of_COOH, number_of_epoxy, number_of_OH, output_filename
!python GOPY.py generate_GO graphene.pdb 2 4 8 functionalized.pdb

# Return to the parent directory
%cd ..


Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Fetched 261 kB in 6s (44.3 kB/s)
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading p

In [None]:
import os
os.environ["OMP_NUM_THREADS"] = "4"  # limit OpenMP threads if you want

import numpy as np
import matplotlib.pyplot as plt

from scipy.spatial import cKDTree
from scipy.signal import find_peaks
from scipy.optimize import curve_fit
from scipy.constants import Avogadro

!pip install MDAnalysis
import MDAnalysis as mda
from MDAnalysis.analysis.rdf import InterRDF

# We can also create a symlink so we can call "lmp" directly:
!ln -s /content/lammps/build/lmp /usr/local/bin/lmp
!which lmp


Collecting MDAnalysis
  Downloading MDAnalysis-2.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (108 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/108.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.5/108.5 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting GridDataFormats>=0.4.0 (from MDAnalysis)
  Downloading GridDataFormats-1.0.2-py3-none-any.whl.metadata (4.9 kB)
Collecting mmtf-python>=1.0.0 (from MDAnalysis)
  Downloading mmtf_python-1.1.3-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting fasteners (from MDAnalysis)
  Downloading fasteners-0.19-py3-none-any.whl.metadata (4.9 kB)
Collecting mda-xdrlib (from MDAnalysis)
  Downloading mda_xdrlib-0.2.0-py3-none-any.whl.metadata (19 kB)
Collecting waterdynamics (from MDAnalysis)
  Downloading waterdynamics-1.2.0-py3-none-any.whl.metadata (37 kB)
Collecting pathsimanalysis (from MDAnalysis)
  Downloading pathsimanalys



/usr/local/bin/lmp


In [31]:
import numpy as np

def generate_random_position_box(box_bounds, existing_positions, min_distance=2.5, tolerance=0.1):
    max_attempts = 1000
    xlo, xhi, ylo, yhi, zlo, zhi = box_bounds
    for _ in range(max_attempts):
        x = np.random.uniform(xlo, xhi)
        y = np.random.uniform(ylo, yhi)
        z = np.random.uniform(zlo, zhi)
        pos = np.array([x, y, z])

        if existing_positions.size == 0:
            return pos
        distances = np.linalg.norm(existing_positions - pos, axis=1)
        if np.all(distances >= (min_distance - tolerance)):
            return pos
    return None

def place_sorbate(pos, orientation, bond_length=0.9572, bond_angle=104.52):
    theta, phi = orientation
    angle_rad = np.deg2rad(bond_angle / 2)
    x_offset = bond_length * np.sin(angle_rad) * np.cos(phi)
    y_offset = bond_length * np.sin(angle_rad) * np.sin(phi)
    z_offset = bond_length * np.cos(angle_rad)
    H1 = pos + np.array([x_offset, y_offset, z_offset])
    H2 = pos + np.array([-x_offset, -y_offset, z_offset])
    return H1, H2

def create_water_molecules(num_water, box_bounds, existing_positions, min_distance_O=2.5, min_distance_H=1.5):
    water_positions = []
    bond_length = 0.9572
    bond_angle = 104.52

    for _ in range(num_water):
        theta = np.random.uniform(0, np.pi)
        phi = np.random.uniform(0, 2 * np.pi)
        orientation = (theta, phi)

        O = generate_random_position_box(box_bounds, existing_positions, min_distance=min_distance_O)
        if O is None:
            continue
        H1, H2 = place_sorbate(O, orientation, bond_length, bond_angle)
        xlo, xhi, ylo, yhi, zlo, zhi = box_bounds

        if not (xlo <= H1[0] <= xhi and ylo <= H1[1] <= yhi and zlo <= H1[2] <= zhi and
                xlo <= H2[0] <= xhi and ylo <= H2[1] <= yhi and zlo <= H2[2] <= zhi):
            continue

        if existing_positions.size > 0:
            distances_O = np.linalg.norm(existing_positions - O, axis=1)
            distances_H1 = np.linalg.norm(existing_positions - H1, axis=1)
            distances_H2 = np.linalg.norm(existing_positions - H2, axis=1)
        else:
            distances_O = distances_H1 = distances_H2 = np.array([])

        if ((existing_positions.size == 0) or
            (np.all(distances_O >= min_distance_O) and
             np.all(distances_H1 >= min_distance_H) and
             np.all(distances_H2 >= min_distance_H))):
            water_positions.append((O, H1, H2))
            existing_positions = np.vstack([existing_positions, O, H1, H2])

    return water_positions, existing_positions

def add_cations(num_Mg, num_Zn, num_Ca, box_bounds, existing_positions):
    cations = {'Mg': (6, 2.0, num_Mg),
               'Zn': (7, 2.0, num_Zn),
               'Ca': (8, 2.0, num_Ca)}
    added_cations = []

    for ion, (type_id, charge, ion_count) in cations.items():
        for _ in range(ion_count):
            pos = generate_random_position_box(box_bounds, existing_positions, min_distance=2.5)
            if pos is not None:
                added_cations.append((ion, type_id, charge, pos))
                existing_positions = np.vstack([existing_positions, pos])
    return added_cations, existing_positions

def append_ions_to_pdb(pdb_in, pdb_out, num_Hg = 10, num_Mg=10, num_Zn=10, num_Ca=10, num_waters=50, box_bounds=(0, 50, 0, 50, 0, 50)):
    # Read initial PDB and extract existing atom positions
    with open(pdb_in, 'r') as f:
        lines = f.readlines()
    content_lines = [line for line in lines if line.startswith("ATOM")]
    existing_positions = []
    for line in content_lines:
        try:
            x = float(line[30:38])
            y = float(line[38:46])
            z = float(line[46:54])
        except ValueError:
            continue
        existing_positions.append([x, y, z])
    existing_positions = np.array(existing_positions) if existing_positions else np.empty((0, 3))

    # Add cations
    added_cations, existing_positions = add_cations(num_Mg, num_Zn, num_Ca, box_bounds, existing_positions)

    # Add water molecules
    water_positions, existing_positions = create_water_molecules(num_waters, box_bounds, existing_positions)

    # Prepare new PDB lines
    new_lines = [line for line in lines if not line.startswith("END")]
    last_serial = max([int(line[6:11].strip()) for line in new_lines if line.startswith("ATOM")], default=0) + 1
    resSeq = 1

    # Format atom line
    def format_atom_line(serial, atom_name, resName, chainID, resSeq, x, y, z, occupancy=1.00, temp=0.00):
        return (f"ATOM  {serial:5d} {atom_name:^4}{resName:>3} {chainID}{resSeq:4d}    "
                f"{x:8.3f}{y:8.3f}{z:8.3f}{occupancy:6.2f}{temp:6.2f}          {atom_name:>2}\n")

    # Append new cation atoms
    for ion, type_id, charge, pos in added_cations:
        x, y, z = pos
        line = format_atom_line(last_serial, ion, "ION", "A", resSeq, x, y, z)
        new_lines.append(line)
        last_serial += 1
        resSeq += 1

    # Append water atoms (O, H, H) for each water molecule
    for (O, H1, H2) in water_positions:
        for atom_label, pos in zip(["O", "H", "H"], [O, H1, H2]):
            x, y, z = pos
            line = format_atom_line(last_serial, atom_label, "HOH", "A", resSeq, x, y, z)
            new_lines.append(line)
            last_serial += 1
        resSeq += 1

    new_lines.append("END\n")
    with open(pdb_out, 'w') as f:
        f.writelines(new_lines)

# Example usage:
box_bounds = (0.0, 50.0, 0.0, 50.0, 0.0, 50.0)
append_ions_to_pdb("GOPY/functionalized.pdb", "updated_structure.pdb",
                                        num_Hg=10, num_Mg=10, num_Zn=10, num_Ca=10,
                                        num_waters=50, box_bounds=box_bounds)

In [28]:
import os

def setup_and_run_sim(num_Hg=5, box_size=(50.0, 50.0, 50.0)):
    # Append ions to the original PDB to create an updated structure
    append_ions_to_pdb("GOPY/functionalized.pdb", "updated_structure.pdb", num_Hg, np.array(box_size))

    # Define the LAMMPS input script as a multiline string
    input_script = f"""
units           real
atom_style      full
boundary        p p p

read_data       updated_structure.pdb pdb

pair_style      lj/cut/coul/long 12.0 12.0
kspace_style    pppm 1.0e-5

pair_coeff 1 1 0.07 3.40  # C
pair_coeff 2 2 0.16 3.05  # O
pair_coeff 3 3 0.02 0.00  # H
pair_coeff 4 4 0.20 3.70  # Hg2+
pair_coeff 5 5 0.10 3.40  # Cl-
# Add additional pair_coeff definitions for other atom types if needed

bond_style      harmonic
bond_coeff      1 450.0 1.42

angle_style     harmonic
angle_coeff     1 55.0 120.0

group           carbon type 1
fix             fix_carbon carbon setforce 0.0 0.0 0.0

neighbor        2.0 bin
neigh_modify    delay 0 every 1 check yes

min_style       cg
minimize        1e-4 1e-6 100 1000

velocity        all create 300.0 12345 mom yes rot yes dist gaussian
fix             nvt_control all nvt temp 300.0 300.0 100.0

thermo          1000
thermo_style    custom step temp etotal press

dump            1 all atom 100 dump.lammpstrj

timestep        1.0
run             2000
"""
    # Write the input script to file
    with open("in_run.lmp", "w") as f:
        f.write(input_script)

    # Execute the LAMMPS simulation
    os.system("lmp -in in_run.lmp")


In [29]:
import MDAnalysis as mda
from MDAnalysis.analysis.rdf import InterRDF
from scipy.spatial import cKDTree
from scipy.signal import find_peaks

def analyze_simulation(num_Hg, dump_file, box_dims=(50,50,50), mass_adsorbent=1.0):
    # Load Universe using updated_structure.pdb for topology and dump file for trajectory
    u = mda.Universe("updated_structure.pdb", dump_file,
                 topology_format='PDB', format='LAMMPSDUMP')


    ref_group = u.select_atoms('element C')    # carbon atoms
    target_group = u.select_atoms('element Hg')  # mercury atoms

    r_min, r_max = 0.0, 12.0
    rdf_calc = InterRDF(ref_group, target_group, range=(r_min, r_max), nbins=120)
    rdf_calc.run()

    r = rdf_calc.bins
    rdf = rdf_calc.rdf

    peaks, _ = find_peaks(rdf)
    if len(peaks) > 0:
        first_peak = peaks[0]
        inverted = -rdf
        minima, _ = find_peaks(inverted, distance=10)
        cands = minima[minima > first_peak]
        r_cut = r[cands[0]] if len(cands) > 0 else r_max
    else:
        r_cut = r_max

    adsorbed = 0
    total_frames = 0
    for ts in u.trajectory:
        p_ref = ref_group.positions
        p_tg = target_group.positions
        tr_ref = cKDTree(p_ref)
        sdm = tr_ref.sparse_distance_matrix(cKDTree(p_tg), max_distance=r_cut, output_type='coo_matrix')
        col_ids = np.unique(sdm.col)
        adsorbed += len(col_ids)
        total_frames += 1

    avg_adsorbed = adsorbed / total_frames if total_frames > 0 else 0.0
    MW_Hg = 200.59
    q_e = (avg_adsorbed * MW_Hg) / mass_adsorbent * 1e3

    volume_A3 = box_dims[0]*box_dims[1]*box_dims[2]
    volume_L = volume_A3 * 1e-24
    N_initial = num_Hg
    N_unads = N_initial - avg_adsorbed
    C_e = (N_unads * MW_Hg / Avogadro) / volume_L * 1e3

    return C_e, q_e, r, rdf, r_cut


In [32]:
import csv
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

Hg_counts = [5, 10, 15, 20]
Ce_values = []
qe_values = []
rdf_data = []

for Hg_num in Hg_counts:
    # Run simulation for given number of Hg atoms
    setup_and_run_sim(num_Hg=Hg_num, box_size=(50,50,50))

    dump_file = "dump.lammpstrj"
    Ce, q_e, r, rdf, r_cut = analyze_simulation(num_Hg=Hg_num, dump_file=dump_file, box_dims=(50,50,50))

    Ce_values.append(Ce)
    qe_values.append(q_e)
    rdf_data.append((Hg_num, r, rdf))

with open('sorption_data.csv','w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Hg_num","Ce(mg/L)","q_e(mg/g)"])
    for num, Ce, q_e in zip(Hg_counts, Ce_values, qe_values):
        writer.writerow([num, Ce, q_e])

if len(Ce_values) > 2:
    def langmuir(C, q_max, K_L):
        return (q_max*K_L*C)/(1.0 + K_L*C)

    try:
        params, _ = curve_fit(langmuir, Ce_values, qe_values, p0=[max(qe_values),0.1])
        Ce_fit = np.linspace(min(Ce_values), max(Ce_values), 100)
        qe_fit = langmuir(Ce_fit,*params)

        plt.figure(figsize=(7,5))
        plt.scatter(Ce_values, qe_values, c='b', label='Data')
        plt.plot(Ce_fit, qe_fit, 'r--', label='Langmuir fit')
        plt.xlabel("Ce (mg/L)")
        plt.ylabel("qe (mg/g)")
        plt.title("Hg Sorption Isotherm")
        plt.grid(True)
        plt.legend()
        plt.show()

        print("Langmuir fit parameters:")
        print(f"q_max= {params[0]:.2f}, K_L= {params[1]:.4f}")
    except Exception as e:
        print("curve_fit failed:", e)

plt.figure(figsize=(8,6))
for (Hg_num, r, rdf) in rdf_data:
    plt.plot(r, rdf, label=f"Hg={Hg_num}")
plt.xlabel("Distance (Å)")
plt.ylabel("g(r)")
plt.title("Carbon-Hg RDF")
plt.legend()
plt.grid(True)
plt.show()

TypeError: only integer scalar arrays can be converted to a scalar index