# LAMMPS Simulation Workflow

This notebook provides a structured workflow for running LAMMPS simulations, archiving results, and uploading to S3.  


In [1]:
# --- Configuration Variables ---

import time
import subprocess
import os
import tarfile
import boto3

# Simulation input/output files
INPUT_FILE = "MD_test.in"
LOG_FILE = "MD_test.log"
SCREEN_FILE = "MD_test.err"

# Number of cores for MPI
NUM_OF_CORES = 8

# S3 upload parameters
BUCKET_NAME = "labdev-sagemaker-studio"
S3_PREFIX = "lammps_simulation/your_name/"  # <-- Edit as needed

# Name for this simulation run (used for tar file naming)
RUN_NAME = "my_run"  # <-- Set this to your desired run name

# Archive timestamp format
TIMESTAMP = time.strftime("%Y-%m-%d")

## (Optional) Change Working Directory

If your simulation files are in a different directory, use the cell below to change directories.

In [2]:
# Example: Uncomment and set your path if needed
# %cd /path/to/your/lammps/project

In [10]:

# Build your LAMMPS or other shell command using the configuration variables above.
# Example:

cmd = f"mpirun --oversubscribe -np {NUM_OF_CORES} lmp_gpu -in {INPUT_FILE} -log {LOG_FILE} -screen {SCREEN_FILE}"  # <-- Edit as needed

if cmd:
    print("Running command:\n", cmd)
    try:
        result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        print("Command failed with return code:", e.returncode)
    #print("STDOUT:\n", result.stdout)
    #print("STDERR:\n", result.stderr)
else:
    print("No command specified. Please set the 'cmd' variable.")

Running command:
 mpirun --oversubscribe -np 8 lmp_gpu -in MD_test.in -log MD_test.log -screen MD_test.err
Command failed with return code: 127
STDOUT:
 
STDERR:
 /bin/sh: 1: mpirun: not found



## Create `.tar.gz` Archive of Current Directory

This will create a `.tar.gz` archive of the **current directory** (including all files and subfolders).

**Tips:**
- Ensure the notebook is inside the directory you want to archive, or change directory first.
- To check your current directory, run the next cell.

In [4]:
print("Current working directory:", os.getcwd())

Current working directory: /mnt/custom-file-systems/efs/fs-061ba43065b82da0d_fsap-0b84c9aed0bbe96d9/MD_test_CPU


In [5]:
def create_archive():
    """
    Archive the current directory into a tar.gz file.
    """
    current_dir = os.getcwd()
    # Use RUN_NAME for the tar file name
    tar_filename = f"{RUN_NAME}_{TIMESTAMP}.tar.gz"
    with tarfile.open(tar_filename, "w:gz") as tar:
        for root, dirs, files in os.walk(current_dir):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, start=os.path.dirname(current_dir))
                tar.add(file_path, arcname=arcname)
    print(f"Created archive: {tar_filename}")
    return tar_filename

# Create the archive
tar_filename = create_archive()

Created archive: my_run_2025-06-11.tar.gz


## Upload `.tar.gz` to S3 Bucket

Uploads the archive to your S3 bucket.  
**Edit the `s3_prefix` variable at the top as needed.**

In [8]:
def upload_to_s3(tar_filename, bucket_name, s3_prefix):
    """
    Upload the tar.gz file to the specified S3 bucket and prefix.
    """
    if not s3_prefix.endswith("/"):
        s3_prefix += "/"
    s3_key = f"{s3_prefix}{tar_filename}"
    s3 = boto3.client('s3')
    s3.upload_file(tar_filename, bucket_name, s3_key)
    print(f"Uploaded {tar_filename} to s3://{bucket_name}/{s3_key}")

upload_to_s3(tar_filename, BUCKET_NAME, S3_PREFIX)

Uploaded my_run_2025-06-11.tar.gz to s3://labdev-sagemaker-studio/lammps_simulation/your_name/my_run_2025-06-11.tar.gz
