<a href="https://colab.research.google.com/github/sushirito/Molecular-Dynamics/blob/23_AC_Replication/Sargassum_Replication.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Replicating carbon structure of this paper: https://www.mdpi.com/1420-3049/27/18/6040

In [1]:
%%capture
!apt-get update
!apt-get install -y build-essential cmake libfftw3-dev libjpeg-dev libpng-dev \
                    libopenmpi-dev openmpi-bin python3-dev python3-numpy git

# Clone the LAMMPS repository
%cd /content
!git clone -b stable https://github.com/lammps/lammps.git
%cd lammps

# Create a build directory and compile LAMMPS with required packages
!mkdir build
%cd build
!cmake ../cmake -DBUILD_SHARED_LIBS=yes \
                -DLAMMPS_EXCEPTIONS=yes \
                -DPKG_MOLECULE=yes \
                -DPKG_KSPACE=yes \
                -DPKG_RIGID=yes \
                -DPKG_MANYBODY=yes \
                -DPKG_USER-MISC=yes \
                -DPKG_PYTHON=yes \
                -DPYTHON_EXECUTABLE=`which python3`
!make -j4
!make install-python

# Return to the working directory
%cd /content/


In [2]:
import os
os.environ["OMP_NUM_THREADS"] = "4"  # limit OpenMP threads if you want

import numpy as np
import matplotlib.pyplot as plt

from scipy.spatial import cKDTree
from scipy.signal import find_peaks
from scipy.optimize import curve_fit
from scipy.constants import Avogadro

!pip install MDAnalysis
import MDAnalysis as mda
from MDAnalysis.analysis.rdf import InterRDF

# We can also create a symlink so we can call "lmp" directly:
!ln -s /content/lammps/build/lmp /usr/local/bin/lmp
!which lmp


Collecting MDAnalysis
  Downloading MDAnalysis-2.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (108 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/108.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.5/108.5 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting GridDataFormats>=0.4.0 (from MDAnalysis)
  Downloading GridDataFormats-1.0.2-py3-none-any.whl.metadata (4.9 kB)
Collecting mmtf-python>=1.0.0 (from MDAnalysis)
  Downloading mmtf_python-1.1.3-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting fasteners (from MDAnalysis)
  Downloading fasteners-0.19-py3-none-any.whl.metadata (4.9 kB)
Collecting mda-xdrlib (from MDAnalysis)
  Downloading mda_xdrlib-0.2.0-py3-none-any.whl.metadata (19 kB)
Collecting waterdynamics (from MDAnalysis)
  Downloading waterdynamics-1.2.0-py3-none-any.whl.metadata (37 kB)
Collecting pathsimanalysis (from MDAnalysis)
  Downloading pathsimanalys



/usr/local/bin/lmp


In [3]:
# Install required dependencies for GOPY
!apt-get update
!apt-get install -y libgl1-mesa-glx libxi6 libxrender1
!pip install numpy scipy

# Clone the GOPY repository and navigate into it
!git clone https://github.com/Iourarum/GOPY.git
%cd GOPY

# Generate a pristine graphene sheet
!python GOPY.py generate_PG 10 10 graphene.pdb

# Functionalize the graphene:
# Arguments: path_to_file, number_of_COOH, number_of_epoxy, number_of_OH, output_filename
!python GOPY.py generate_GO graphene.pdb 2 4 8 functionalized.pdb

# Return to the parent directory
%cd ..


Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Fetched 261 kB in 6s (44.3 kB/s)
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading p

In [7]:
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.constants import Avogadro

import MDAnalysis as mda
from MDAnalysis.analysis.rdf import InterRDF

# Function to read the functionalized PDB and extract structure information
def get_carbon_structure_from_pdb(pdb_path):
    """
    Reads a PDB file and extracts atom positions, bond connectivity, angles,
    and atom information to be used for building a LAMMPS data file.
    """
    u = mda.Universe(pdb_path)
    # Guess missing element information
    u.guess_TopologyAttrs(context='default', to_guess=['elements'])

    # Extract positions
    positions = u.atoms.positions.tolist()

    # Initialize atom information list
    atom_info = []
    for atom in u.atoms:
        # Use guessed element information
        if atom.element == "C":
            atom_info.append({'elem': 'C', 'type': 1, 'charge': 0.0})
        elif atom.element == "O":
            atom_info.append({'elem': 'O', 'type': 2, 'charge': -0.5})
        elif atom.element == "H":
            atom_info.append({'elem': 'H', 'type': 3, 'charge': 0.0})
        else:
            # Default type for any unexpected element
            atom_info.append({'elem': atom.element, 'type': 99, 'charge': 0.0})

    # Extract bonds from the PDB
    bonds = []
    bond_id = 1
    for bond in u.bonds:
        atom1 = bond.atoms[0].index + 1
        atom2 = bond.atoms[1].index + 1
        bonds.append((bond_id, 1, atom1, atom2))
        bond_id += 1

    # Angles extraction (left empty here; compute if needed)
    angles = []

    return positions, bonds, angles, atom_info


# Function to create a LAMMPS data file using the carbon structure from PDB and placing Hg²⁺ and Cl⁻
def create_data_file(num_Hg, filename, box_size, num_Mg, num_Zn, num_Ca, num_water):
    # 1. Get the carbon structure from the PDB file
    pdb_path = "GOPY/functionalized.pdb"
    positions, bonds, angles, atom_info = get_carbon_structure_from_pdb(pdb_path)

    # 2. Initialize lists for all atoms, bonds, angles, etc.
    all_positions = list(positions)
    all_bonds = list(bonds)
    all_angles = list(angles)
    all_atom_info = list(atom_info)

    # 3. Add Hg²⁺ and Cl⁻ placement
    for i in range(num_Hg):
        # Place Hg randomly within the box
        pos_Hg = np.random.rand(3) * np.array(box_size)
        all_positions.append(pos_Hg.tolist())
        all_atom_info.append({'elem': 'Hg', 'type': 4, 'charge': 2.0})

        # Place Cl randomly within the box
        pos_Cl = np.random.rand(3) * np.array(box_size)
        all_positions.append(pos_Cl.tolist())
        all_atom_info.append({'elem': 'Cl', 'type': 5, 'charge': -1.0})

    # 4. Write the LAMMPS data file
    with open(filename, 'w') as f:
        f.write(f"LAMMPS data file via Python\n\n")
        f.write(f"{len(all_atom_info)} atoms\n")
        f.write(f"{len(all_bonds)} bonds\n")
        f.write(f"{len(all_angles)} angles\n\n")

        f.write(f"0.0 {box_size[0]} xlo xhi\n")
        f.write(f"0.0 {box_size[1]} ylo yhi\n")
        f.write(f"0.0 {box_size[2]} zlo zhi\n\n")

        f.write("Masses\n\n")
        f.write("1 12.01  # C\n")
        f.write("2 16.00  # O\n")
        f.write("3 1.008  # H\n")
        f.write("4 200.59 # Hg\n")
        f.write("5 35.45  # Cl\n")

        f.write("\nAtoms\n\n")
        for idx, atom in enumerate(all_atom_info, start=1):
            pos = all_positions[idx-1]
            f.write(f"{idx} 1 {atom['type']} {atom['charge']} {pos[0]} {pos[1]} {pos[2]}\n")

        if all_bonds:
            f.write("\nBonds\n\n")
            for bond in all_bonds:
                bond_id, bond_type, a1, a2 = bond
                f.write(f"{bond_id} {bond_type} {a1} {a2}\n")

        if all_angles:
            f.write("\nAngles\n\n")
            for angle in all_angles:
                angle_id, angle_type, a1, a2, a3 = angle
                f.write(f"{angle_id} {angle_type} {a1} {a2} {a3}\n")
# Simulation setup and run function
def setup_and_run_sim(num_Hg=5, box_size=(50.0, 50.0, 50.0),
                      num_Mg=2, num_Zn=2, num_Ca=2, num_water=500):
    data_file = f"data_{num_Hg}.lmp"
    dump_file = f"dump_{num_Hg}.lammpstrj"

    create_data_file(num_Hg=num_Hg, filename=data_file, box_size=box_size,
                     num_Mg=num_Mg, num_Zn=num_Zn, num_Ca=num_Ca, num_water=num_water)

    input_script = f"""
units           real
atom_style      full
boundary        p p p
read_data       {data_file}

pair_style      lj/cut/coul/long 12.0 12.0
kspace_style    pppm 1.0e-5

pair_coeff 1 1 0.07 3.40  # C
pair_coeff 2 2 0.16 3.05  # O
pair_coeff 3 3 0.02 0.00  # H
pair_coeff 4 4 0.20 3.70  # Hg2+
pair_coeff 5 5 0.10 3.40  # Cl-
# Add additional pair_coeff definitions for other atom types if needed

bond_style      harmonic
bond_coeff      1 450.0 1.42

angle_style     harmonic
angle_coeff     1 55.0 120.0

group           carbon type 1
fix             fix_carbon carbon setforce 0.0 0.0 0.0

neighbor        2.0 bin
neigh_modify    delay 0 every 1 check yes

min_style       cg
minimize        1e-4 1e-6 100 1000

velocity        all create 300.0 12345 mom yes rot yes dist gaussian
fix             nvt_control all nvt temp 300.0 300.0 100.0

thermo          1000
thermo_style    custom step temp etotal press

dump            1 all atom 100 {dump_file}

timestep        1.0
run             2000
"""
    with open("in_run.lmp", "w") as f:
        f.write(input_script)

    os.system(f"lmp -in in_run.lmp")

# Analysis function
def analyze_simulation(num_Hg=5, box_dims=(50,50,50), mass_adsorbent=1.0,
                       initial_counts=None):
    if initial_counts is None:
        initial_counts = {'Hg': num_Hg, 'Mg': 2, 'Zn': 2, 'Ca': 2}

    data_file = f"data_{num_Hg}.lmp"
    dump_file = f"dump_{num_Hg}.lammpstrj"

    if not os.path.exists(dump_file):
        print(f"Dump file {dump_file} not found. Skipping analysis.")
        return None, None, None, None, None

    u = mda.Universe(
        topology=data_file,
        trajectory=dump_file,
        topology_format='DATA',
        trajectory_format='LAMMPSDUMP'
    )

    ref_group = u.select_atoms('type 1')  # carbon
    target_group = u.select_atoms('type 4')  # mercury

    r_min, r_max = 0.0, 12.0
    rdf_calc = InterRDF(ref_group, target_group, range=(r_min, r_max), nbins=120)
    rdf_calc.run()

    r = rdf_calc.bins
    rdf = rdf_calc.rdf

    from scipy.signal import find_peaks
    peaks, _ = find_peaks(rdf)
    if len(peaks) > 0:
        first_peak = peaks[0]
        inverted = -rdf
        minima, _ = find_peaks(inverted, distance=10)
        cands = minima[minima > first_peak]
        if len(cands) > 0:
            r_cut = r[cands[0]]
        else:
            r_cut = r_max
    else:
        r_cut = r_max

    from scipy.spatial import cKDTree
    adsorbed = 0
    total_frames = 0
    for ts in u.trajectory:
        p_ref = ref_group.positions
        p_tg = target_group.positions
        tr_ref = cKDTree(p_ref)
        sdm = tr_ref.sparse_distance_matrix(cKDTree(p_tg), max_distance=r_cut, output_type='coo_matrix')
        col_ids = np.unique(sdm.col)
        adsorbed += len(col_ids)
        total_frames += 1

    avg_adsorbed = adsorbed / total_frames if total_frames > 0 else 0.0

    MW_Hg = 200.59
    q_e = (avg_adsorbed * MW_Hg) / mass_adsorbent * 1e3

    volume_A3 = box_dims[0]*box_dims[1]*box_dims[2]
    volume_L = volume_A3 * 1e-24
    N_initial = initial_counts['Hg']
    N_unads = N_initial - avg_adsorbed
    C_e = (N_unads * MW_Hg / Avogadro) / volume_L * 1e3

    return C_e, q_e, r, rdf, r_cut

In [8]:
import csv

Hg_counts = [5, 10, 15, 20]

Ce_values = []
qe_values = []
rdf_data = []

for Hg_num in Hg_counts:
    setup_and_run_sim(
        num_Hg=Hg_num,
        box_size=(50,50,50),
        num_Mg=2,
        num_Zn=2,
        num_Ca=2,
        num_water=500
    )

    Ce, q_e, r, rdf, r_cut = analyze_simulation(
        Hg_num, box_dims=(50,50,50),
        initial_counts={'Hg': Hg_num, 'Mg':2, 'Zn':2, 'Ca':2}
    )
    if Ce is not None:
        Ce_values.append(Ce)
        qe_values.append(q_e)
        rdf_data.append((Hg_num, r, rdf))

with open('sorption_data.csv','w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Hg_num","Ce(mg/L)","q_e(mg/g)"])
    for num, Ce, q_e in zip(Hg_counts, Ce_values, qe_values):
        writer.writerow([num, Ce, q_e])

if len(Ce_values) > 2:
    def langmuir(C, q_max, K_L):
        return (q_max*K_L*C)/(1.0 + K_L*C)

    try:
        params, _ = curve_fit(langmuir, Ce_values, qe_values, p0=[max(qe_values),0.1])
        Ce_fit = np.linspace(min(Ce_values), max(Ce_values), 100)
        qe_fit = langmuir(Ce_fit,*params)

        plt.figure(figsize=(7,5))
        plt.scatter(Ce_values, qe_values, c='b', label='Data')
        plt.plot(Ce_fit, qe_fit, 'r--', label='Langmuir fit')
        plt.xlabel("Ce (mg/L)")
        plt.ylabel("qe (mg/g)")
        plt.title("Hg Sorption Isotherm")
        plt.grid(True)
        plt.legend()
        plt.show()

        print("Langmuir fit parameters:")
        print(f"q_max= {params[0]:.2f}, K_L= {params[1]:.4f}")
    except Exception as e:
        print("curve_fit failed:", e)

plt.figure(figsize=(8,6))
for (Hg_num, r, rdf) in rdf_data:
    plt.plot(r, rdf, label=f"Hg={Hg_num}")
plt.xlabel("Distance (Å)")
plt.ylabel("g(r)")
plt.title("Carbon-Hg RDF")
plt.legend()
plt.grid(True)
plt.show()


NoDataError: This Universe does not contain bonds information