In [4]:
import boto3
import getpass
import numpy as np
from preconfig import Preconfig
import pandas as pd
from pathlib import Path


# 1. Upload config files to S3

In [5]:
# Preconfig class allows us to parse a template file and generate a list of config files.
# Two loops puts the generated config files for a given number of repeats in S3.
preconfig = Preconfig()
path_to_template = '../templates/vary_compress_rate.cym.tpl'
configs = preconfig.parse(path_to_template,{})
s3_client = boto3.client("s3")
bucket = 'cytosim-working-bucket'
num_repeats = 5
job_names = []
buffered = np.empty((len(configs)), dtype=object)
for index, config in enumerate(configs):
    job_name = config[:-4]
    job_names.append(job_name)
    for repeat in range(num_repeats):
        opened_config = open(config, "rb")
        config_name = f'{job_name}/config/{job_name}_{repeat}.cym' 
        s3_client.put_object(Bucket=bucket, Key=config_name, Body=opened_config)
job_names

['vary_compress_rate0000',
 'vary_compress_rate0001',
 'vary_compress_rate0002',
 'vary_compress_rate0003',
 'vary_compress_rate0004',
 'vary_compress_rate0005',
 'vary_compress_rate0006']

# 2a. Specify job definition

In [11]:
job_definition_arn = "job_definition_arn"

# 2b. Create and register job definition

In [2]:
# Parameters for a job definition
from container_collection.batch.register_batch_job import register_batch_job
job_definition_name = "karthikv_cytosim_varycompressrate"
image = "simularium/cytosim:latest"
vcpus = 1
memory = 7000
bucket_name = "s3://cytosim-working-bucket/"
simulation_name = ''

In [15]:
account = getpass.getpass()

In [16]:
# Make batch job is a function that takes in the parameters below and returns a dictionary that is used to create a batch job. 
def make_batch_job(
    name: str,
    image: str,
    account: str,
    region: str,
    user: str,
    vcpus: int,
    memory: int,
    prefix: str
) -> dict:
    return {
    "jobDefinitionName": f"{user}-{name}",
    "type": "container",
    "containerProperties": {
        "image": image,
        "vcpus": vcpus,
        "memory": memory,
        "environment": [
            {"name": "SIMULATION_TYPE", "value": "AWS"},
            {"name": "S3_INPUT_URL", "value": bucket_name},
            {"name": "SIMULATION_NAME", "value": simulation_name}
        ],
        "jobRoleArn": f"arn:aws:iam::{account}:role/BatchJobRole",
    },
}

In [17]:
# Creating job definitions with make_batch_job
# Submitting job definitions with register_batch_job
jobs = np.empty(len(configs))
job_definitions = np.empty((len(configs)), dtype=object)
for index in range(len(configs)):
    print(index)
    simulation_name = job_names[index]
    print(simulation_name)
    job_definition = make_batch_job(f"cytosim-varycompressrate-dryrun-{str(index)}", 'simularium/cytosim:latest', account, 'us-west-2', 'karthikv', 1, 7000, 's3://cytosim-working-bucket/')
    registered_jd = register_batch_job(job_definition)
    job_definitions[index] = registered_jd
    

        


0
vary_compress_rate0000
1
vary_compress_rate0001
2
vary_compress_rate0002
3
vary_compress_rate0003
4
vary_compress_rate0004
5
vary_compress_rate0005
6
vary_compress_rate0006


# 3. Submit job

In [8]:
# Submit batch job allows us to submit a batch job with a given job definition and job name.
from container_collection.batch.submit_batch_job import submit_batch_job

In [9]:
new_configs = configs[-3:]
new_configs

['vary_compress_rate0004.cym',
 'vary_compress_rate0005.cym',
 'vary_compress_rate0006.cym']

In [10]:
# Parameters for our batch job [size indicates our desired number of repeats]
job_name = "cytosim-varycompressrate"
user = "karthikv"
queue = "general"
size = 5

In [18]:
# Loop to submit our batch jobs [index * size for total number of simulations]
for index in range(len(new_configs)):
    index = index + 4
    print(index)
    print(f'{job_name}-completerun-config{index}')
    submit_batch_job(name=f'{job_name}-completerun-config{index}', job_definition_arn=job_definitions[index],user=user,queue=queue,size=size)
    

4
cytosim-varycompressrate-completerun-config4
5
cytosim-varycompressrate-completerun-config5
6
cytosim-varycompressrate-completerun-config6


# 4. Monitor job status

In [None]:
# TODO: check job status, print progress bar
from container_collection.batch.check_batch_job import check_batch_job


# 5. Load results

In [5]:
# get results from s3
#simulation_name = 's3://cytosim-working-bucket/vary_compress_rate0000/outputs'
import boto3
# job_arn = 'arn:aws:batch:us-west-2:108503617402:job/3591d595-f11a-40a9-b340-6fd8288aba4f:0'
# client = boto3.client("batch")
# response = client.describe_jobs(jobs=[job_arn])["jobs"]

def read_cytosim_s3_file(bucket_name, file_name):
    s3 = boto3.client('s3')
    try:
        response = s3.get_object(Bucket=bucket_name, Key=file_name)
        file_content = response['Body'].read()
        return file_content.decode('utf-8').splitlines()
    except Exception as e:
        print(f"An error occurred while reading the file: {e}")
#read_file = read_s3_file('cytosim-working-bucket', 'vary_compress_rate0001/outputs/0/objects.cmo')
#print(read_file)

In [11]:
def convert_to_dataframe(fiber_energy_all, fiber_forces_all, suffix = None, rigidity = 0.041):
    bending_energies = []
    for line in fiber_energy_all:
        line = line.strip()
        bending_energy = float(line.split()[2])
        bending_energies.append(bending_energy)


    single_all_lines = fiber_forces_all

    timepoints_forces = []
    outputs = []
    fid=0
    for line in single_all_lines:
        line = line.strip()
        if line.startswith('%'):
            if line.startswith('% time'):
                time = float(line.split(' ')[-1])
                timepoints_forces.append(time)
                singles = {}
            elif line.startswith('% end'):
                df = pd.DataFrame.from_dict(singles, orient = 'index')
                outputs.append(df)
    #                     fiber_point=0
                fid=0
                # print 'finished parsing ' + rundir + ' timepoint ' + str(time)
        elif len(line.split()) > 0:
            print(line.split())
            [fiber_id, xpos, ypos, zpos,
            xforce, yforce, zforce, segment_curvature] = line.split()
    #                 figure out if you're on the first, second fiber point etc
            if int(fid)==int(fiber_id):
                fiber_point+=1
    #                     print(fiber_point)
            else:
                fiber_point=0
                fid+=1
    #                     print('id: '+str(fid))
            singles[str(fiber_id)+'_'+str(fiber_point)] = {'fiber_id' : int(fiber_id),
                                    'xpos': float(xpos), 'ypos' : float(ypos), 'zpos': float(zpos),
                                    'xforce' : float(xforce), 'yforce' : float(yforce),
                                    'zforce': float(zforce), 'segment_curvature': float(segment_curvature)}

    all_outputs = pd.concat(outputs, keys = timepoints_forces,
                        names = ['time', 'id'])
    # all_outputs = all_outputs.swaplevel('time','id',axis=0).sort_index()
    all_outputs['force_magnitude'] = np.sqrt(np.square(all_outputs['xforce']) + 
                                        np.square(all_outputs['yforce']) +
                                        np.square(all_outputs['zforce']))


    #  Segment bending energy, in pN nm 
    all_outputs['segment_energy'] = all_outputs['segment_curvature'] * rigidity * 1000
    # fiber_forces_outputs_allruns.append(all_outputs)

    save_folder = Path('dataframes')
    save_folder.mkdir(exist_ok=True, parents = True)
    
    file_name = 'actin-forces'

    if suffix is not None:
        file_name += suffix

    all_outputs.to_csv(save_folder/f'{file_name}.csv')

    print( 'finished parsing ')
    all_outputs.tail()
    return all_outputs




In [1]:
from subcell_analysis.cytosim.post_process_cytosim import create_dataframes_for_repeats


In [2]:
bucket_name = 'cytosim-working-bucket'
num_repeats = 5
configs = ['vary_compress_rate0006']

In [10]:
%load_ext autoreload
%autoreload 2

In [3]:
create_dataframes_for_repeats(bucket_name, num_repeats, configs)

Processing index 0 and repeat 0
Saved Output to dataframes/actin-forces0_0.csv
Processing index 0 and repeat 1
Saved Output to dataframes/actin-forces0_1.csv
Processing index 0 and repeat 2
Saved Output to dataframes/actin-forces0_2.csv
Processing index 0 and repeat 3
Saved Output to dataframes/actin-forces0_3.csv
Processing index 0 and repeat 4
Saved Output to dataframes/actin-forces0_4.csv


In [None]:
from subcell_analysis.compression_workflow_runner import run_workflow
from subcell_analysis.compression_workflow_runner import run_workflow, run_metric_calculation
from subcell_analysis.compression_analysis import (
    COMPRESSIONMETRIC,
)

segene