# All calculations were performed within a Docker containerized environment, ensuring computational consistency and reproducibility across all experiments.

### Dockerfile:

```
# Use the miniconda3 base image
FROM continuumio/miniconda3

# Set environment variables for the Gaussian directory
ENV GAUSS_EXEDIR=/opt/gaussian/g16
ENV GAUSS_SCRDIR=/opt/gaussian/scr

# Copy the Gaussian installation files into the container
# (Assuming you have a file like G16-A03-SSE42.tbz in your build context)
COPY G16-A03-SSE42.tbz /tmp/G16-A03-SSE42.tbz

# Install Gaussian by extracting it to /opt/gaussian
RUN mkdir -p /opt/gaussian && \
    tar xvjf /tmp/G16-A03-SSE42.tbz -C /opt/gaussian && \
    rm /tmp/G16-A03-SSE42.tbz
RUN mkdir -p /opt/gaussian/scr && chmod -R 777 /opt/gaussian/scr

# Install Jupyter, Open Babel, and clean up
RUN /opt/conda/bin/conda install jupyter -y --quiet && \
    apt-get update && apt-get install -y \
    openbabel \
    graphviz \
    xdg-utils && \
    /opt/conda/bin/conda clean -a -y

# Install psi4 from conda-forge channel
RUN /opt/conda/bin/conda install -c conda-forge psi4 -y && \
    /opt/conda/bin/conda clean -a -y
    
# Copy the requirements file into the container
COPY requirements.txt /tmp/requirements.txt

# Install Python dependencies from requirements.txt
RUN /opt/conda/bin/pip install -r /tmp/requirements.txt && \
    rm /tmp/requirements.txt
    
# Set the working directory for notebooks
WORKDIR /opt/notebooks

# Command to run Jupyter Notebook
CMD /opt/conda/bin/jupyter notebook --notebook-dir=/opt/notebooks --ip='*' --port=8888 --no-browser --allow-root
```


### Place the Gaussian .tbz file (e.g., G16-A03-SSE42.tbz) in the same directory as this Dockerfile.

### Build the Docker image using the following command:

bash
```
docker build -t gaussian-jupyter .
```

### Run the Docker container with mounted volume for Jupyter notebooks and port mapping as per your original command:

bash
```
sudo docker run -i -t -p 8888:8888 -v /path/mount/dir:/opt/notebooks gaussian-jupyter
```

This Dockerfile ensures that Gaussian and Jupyter are both installed in the container. Gaussian is installed in /opt/gaussian, and you can run Jupyter notebooks from the /opt/notebooks directory, which is mounted from your host system.


In [1]:
import pandas as pd

In [2]:
antidiabetic_df = pd.read_csv('../data/antidiabetic/antidiabetic_molecules_smiles.csv')

In [3]:
import os

# Set Gaussian environment variables in the notebook
os.environ['GAUSS_EXEDIR'] = '/opt/gaussian/g16'
os.environ['GAUSS_SCRDIR'] = '/opt/gaussian/scr'

In [6]:
import os
import subprocess
import pandas as pd
import logging
from colorama import Fore, Style

# Налаштування логування
logging.basicConfig(level=logging.INFO, format="%(message)s")

def log_success(message):
    logging.info(Fore.GREEN + message + Style.RESET_ALL)

def log_error(message):
    logging.error(Fore.RED + message + Style.RESET_ALL)

def generate_gjf_file(smiles: str, file_path: str):
    command = f'obabel -:"{smiles}" -O {file_path} --gen3d'
    process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if process.returncode != 0:
        log_error(f"Помилка при створенні .gjf ({file_path}): {process.stderr.decode()}")
    else:
        log_success(f"Файл .gjf створено успішно: {file_path}")

def run_gaussian(file_path: str, gauss_exe_dir: str):
    command = f"{gauss_exe_dir}/g16 {file_path}"
    process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if process.returncode != 0:
        log_error(f"Помилка при запуску Gaussian ({file_path}): {process.stderr.decode()}")
    else:
        log_success(f"Gaussian виконано успішно: {file_path}")

def convert_to_mol(log_file: str, mol_file: str):
    command = f"obabel {log_file} -O {mol_file}"
    process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if process.returncode != 0:
        log_error(f"Помилка при конвертації у .mol ({log_file}): {process.stderr.decode()}")
    else:
        log_success(f"Конвертація у .mol виконана успішно: {mol_file}")

def process_molecules(df: pd.DataFrame, save_path: str):
    os.makedirs(save_path, exist_ok=True)  # Створення папки, якщо не існує
    
    gauss_exe_dir = os.environ.get("GAUSS_EXEDIR")
    if not gauss_exe_dir:
        log_error("Переменная окружения GAUSS_EXEDIR не установлена.")
        return
    
    for index, row in df.iterrows():
        name_file = os.path.join(save_path, f"{row['id']}.gjf")
        log_file = os.path.join(save_path, f"{row['id']}.log")
        mol_file = os.path.join(save_path, f"{row['id']}.mol")
        chk_file = os.path.join(save_path, f"{row['id']}.chk")
        
        generate_gjf_file(row["SMILES"], name_file)
        
        if not os.path.exists(name_file):
            log_error(f"Файл {name_file} не створений, пропуск обробки.")
            continue
        
        new_lines = [
            "%NProcShared=6\n",
            "%mem=5GB\n",
            f"%chk={chk_file}\n",
            "# PM6 Opt\n",
            "\n",  # Порожній рядок
            "Title Card\n",
            "\n"
        ]
        
        with open(name_file, "r") as file:
            lines = file.readlines()
        
        if len(lines) < 5:
            log_error(f"Помилка: файл {name_file} має недостатньо рядків.")
            continue
        
        lines = new_lines + lines[5:]
        
        with open(name_file, "w") as file:
            file.writelines(lines)
        
        run_gaussian(name_file, gauss_exe_dir)
        convert_to_mol(log_file, mol_file)
    
    log_success("Обробка завершена!")


In [5]:
df = antidiabetic_df[:3].copy()
process_molecules(df, "../gauss_files/antidiabetic/")

[32mФайл .gjf створено успішно: Gauss/antidiabetic/DB00197.gjf[0m
[32mGaussian виконано успішно: Gauss/antidiabetic/DB00197.gjf[0m
[32mКонвертація у .mol виконана успішно: Gauss/antidiabetic/DB00197.mol[0m
[32mФайл .gjf створено успішно: Gauss/antidiabetic/DB00222.gjf[0m
[32mGaussian виконано успішно: Gauss/antidiabetic/DB00222.gjf[0m
[32mКонвертація у .mol виконана успішно: Gauss/antidiabetic/DB00222.mol[0m
[32mФайл .gjf створено успішно: Gauss/antidiabetic/DB00284.gjf[0m
[31mПомилка при запуску Gaussian (Gauss/antidiabetic/DB00284.gjf): Segmentation fault (core dumped)
[0m
[32mКонвертація у .mol виконана успішно: Gauss/antidiabetic/DB00284.mol[0m
[32mОбробка завершена![0m
