In [None]:
import boto3
import getpass
import numpy as np
from preconfig import Preconfig
import pandas as pd
from pathlib import Path
from IPython.display import Image
from simulariumio.cytosim import CytosimConverter, CytosimData, CytosimObjectInfo
from simulariumio import MetaData, DisplayData, DISPLAY_TYPE, ModelMetaData, InputFileData, ScatterPlotData


# 1. Upload config files to S3

In [None]:
# Preconfig class allows us to parse a template file and generate a list of config files.
# Two loops puts the generated config files for a given number of repeats in S3.
preconfig = Preconfig()
path_to_template = '../templates/vary_compress_rate.cym.tpl'
configs = preconfig.parse(path_to_template,{})
s3_client = boto3.client("s3")
bucket = 'cytosim-working-bucket'
num_repeats = 5
job_names = []
buffered = np.empty((len(configs)), dtype=object)
for index, config in enumerate(configs):
    job_name = config[:-4]
    job_names.append(job_name)
    for repeat in range(num_repeats):
        opened_config = open(config, "rb")
        config_name = f'{job_name}/config/{job_name}_{repeat}.cym' 
        s3_client.put_object(Bucket=bucket, Key=config_name, Body=opened_config)
job_names

# 2a. Specify job definition

In [None]:
job_definition_arn = "job_definition_arn"

# 2b. Create and register job definition

In [None]:
# Parameters for a job definition
from container_collection.batch.register_batch_job import register_batch_job
job_definition_name = "karthikv_cytosim_varycompressrate"
image = "simularium/cytosim:latest"
vcpus = 1
memory = 7000
bucket_name = "s3://cytosim-working-bucket/"
simulation_name = ''

In [None]:
account = getpass.getpass()

In [None]:
# Make batch job is a function that takes in the parameters below and returns a dictionary that is used to create a batch job. 
def make_batch_job(
    name: str,
    image: str,
    account: str,
    region: str,
    user: str,
    vcpus: int,
    memory: int,
    prefix: str
) -> dict:
    return {
    "jobDefinitionName": f"{user}-{name}",
    "type": "container",
    "containerProperties": {
        "image": image,
        "vcpus": vcpus,
        "memory": memory,
        "environment": [
            {"name": "SIMULATION_TYPE", "value": "AWS"},
            {"name": "S3_INPUT_URL", "value": bucket_name},
            {"name": "SIMULATION_NAME", "value": simulation_name}
        ],
        "jobRoleArn": f"arn:aws:iam::{account}:role/BatchJobRole",
    },
}

In [None]:
# Creating job definitions with make_batch_job
# Submitting job definitions with register_batch_job
jobs = np.empty(len(configs))
job_definitions = np.empty((len(configs)), dtype=object)
for index in range(len(configs)):
    print(index)
    simulation_name = job_names[index]
    print(simulation_name)
    job_definition = make_batch_job(f"cytosim-varycompressrate-{str(index)}", 'simularium/cytosim:latest', account, 'us-west-2', 'karthikv', 1, 7000, 's3://cytosim-working-bucket/')
    registered_jd = register_batch_job(job_definition)
    job_definitions[index] = registered_jd

# 3. Submit job

In [None]:
# Submit batch job allows us to submit a batch job with a given job definition and job name.
from container_collection.batch.submit_batch_job import submit_batch_job

In [None]:
new_configs = configs[:4]
new_configs

In [None]:
# Parameters for our batch job [size indicates our desired number of repeats]
job_name = "cytosim-varycompressrate"
user = "karthikv"
queue = "general_on_demand"
size = 5

In [None]:
new_configs
job_name

In [None]:
# Loop to submit our batch jobs [index * size for total number of simulations]
for index in range(len(new_configs)):
    print(index)
    print(f'{job_name}-completerun-config{index}')
    submit_batch_job(name=f'{job_name}-completerun-config{index}', job_definition_arn=job_definitions[index],user=user,queue=queue,size=size)
    

# 4. Monitor job status

In [None]:
# TODO: check job status, print progress bar
from container_collection.batch.check_batch_job import check_batch_job


# 5. Load results

In [None]:
from subcell_analysis.cytosim.post_process_cytosim import create_dataframes_for_repeats
import pandas as pd

In [None]:
bucket_name = 'cytosim-working-bucket'
num_repeats = 5
num_velocities = 7
configs = [f"vary_compress_rate000{num}" for num in range(2, num_velocities)]

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
create_dataframes_for_repeats(bucket_name, num_repeats, configs)

In [None]:
from subcell_analysis.compression_workflow_runner import run_workflow,  plot_metric, plot_metric_list
from subcell_analysis.compression_analysis import (
    COMPRESSIONMETRIC,
)

In [None]:
config_inds = [3, 4]
outputs = [[None] * num_repeats] * len(config_inds)

In [None]:
calc_metrics = [COMPRESSIONMETRIC.PEAK_ASYMMETRY, COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, COMPRESSIONMETRIC.NON_COPLANARITY, COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, COMPRESSIONMETRIC.SUM_BENDING_ENERGY]

In [None]:
for ct, config_ind in enumerate(config_inds):
    for repeat in range(num_repeats):
        print(f'config_ind: {config_ind}, repeat: {repeat}')
        all_output = pd.read_csv(f'dataframes/actin_forces{config_ind}_{repeat}.csv')
        outputs[ct][repeat] = run_workflow(all_output, calc_metrics)
        outputs[ct][repeat].to_csv(f'dataframes/actin_forces{config_ind}_{repeat}_compression_metrics.csv')

In [None]:
import matplotlib.pyplot as plt
config_ind = 0
metrics = [COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, COMPRESSIONMETRIC.SUM_BENDING_ENERGY, COMPRESSIONMETRIC.PEAK_ASYMMETRY, COMPRESSIONMETRIC.NON_COPLANARITY]
for metric in metrics:
    fig, ax = plt.subplots()
    for repeat in range(num_repeats):
        metric_by_time = outputs[config_ind][repeat].groupby(["time"])[metric.value].mean()
        ax.plot(metric_by_time, label=f"config ind {config_ind} repeat {repeat}")
    ax.legend()
    ax.set_xlabel("time")
    ax.set_ylabel(metric.value)
    ax.set_title(f"{metric.value} by time")

### Plot pacmap embedding

In [None]:
import numpy as np
import pandas as pd
from subcell_analysis.compression_analysis import get_pacmap_embedding
from pacmap import PaCMAP
from scipy import interpolate as spinterp

#### create k x t x n x 3 numpy array of fiber points

In [None]:
num_repeats = 5
df_list = []
configs = [3, 4]
for config in configs:
    for repeat in range(num_repeats):
        df = pd.read_csv(f'dataframes/actin_forces{config}_{repeat}.csv')
        df["repeat"] = repeat
        df["config"] = config
        df_list.append(df)
df_all = pd.concat(df_list)

In [None]:
df_all.to_csv("dataframes/all_fibers_configs_3_4.csv")

In [None]:
num_monomers = 100
num_timepoints = 101
all_config_repeats = []
cols_to_interp = ["xpos", "ypos", "zpos"]
for config, df_config in df_all.groupby("config"):
    for repeat, df_repeat in df_config.groupby("repeat"):
        all_times = []
        for time, df_time in df_repeat.groupby("time"):
            # interpolate xpos, ypos, zpos to num_monomers
            X = df_time[cols_to_interp].values
            t = np.linspace(0, 1, X.shape[0])
            F = spinterp.interp1d(t, X.T, bounds_error=False, fill_value="extrapolate")
            u = np.linspace(0, 1, num_monomers)
            all_times.append(F(u).T)
        all_times = np.array(all_times)
        interp_timepoints = np.around(len(all_times) / num_timepoints * np.arange(num_timepoints)).astype(int)
        all_config_repeats.append(np.array(all_times)[interp_timepoints,:,:])
all_config_repeats = np.array(all_config_repeats)

In [None]:
embedding = PaCMAP(n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0)

In [None]:
reshaped_metrics = all_config_repeats.reshape(all_config_repeats.shape[0], -1)

In [None]:
embed_pos = embedding.fit_transform(reshaped_metrics)

Plot embeddings

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig, ax = plt.subplots()
configs = [3, 4]
for ct, config in enumerate(configs):
    inds = ct * num_repeats + np.arange(num_repeats)
    ax.scatter(embed_pos[inds, 0], embed_pos[inds, 1], label=f"config {config}")
ax.set_xlabel("embedding 1")
ax.set_ylabel("embedding 2")
ax.set_title("PaCMAP embedding of all repeats")
ax.legend()
plt.show()

## 6. Generate Simularium Outputs

In [None]:
from subcell_analysis.cytosim.post_process_cytosim import cytosim_to_simularium
from subcell_analysis.compression_analysis import COMPRESSIONMETRIC
import boto3

In [None]:
num_repeats = 5
config_id = 4

Download files (only needs to be done once)

In [None]:
s3_client = boto3.client("s3")
for repeat in range(num_repeats):
    s3_client.download_file("cytosim-working-bucket", f"vary_compress_rate0006/outputs/{repeat}/fiber_segment_curvature.txt", f"data/fiber_segment_curvature_{repeat}.txt")

### Process single repeat

In [None]:
repeat = 0
input_file_path = f"data/fiber_segment_curvature_{repeat}.txt"

box_size = 3.0
scale_factor = 100
fiber_data = cytosim_to_simularium(input_file_path, box_size=box_size, scale_factor=scale_factor)

Create cytosim converter object

In [None]:
cytosim_converter = CytosimConverter(fiber_data)

Read metric data

In [None]:
df_path = f"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv"
df = pd.read_csv(df_path)

Add metric plots

In [None]:
plot_metrics = [COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, COMPRESSIONMETRIC.SUM_BENDING_ENERGY, COMPRESSIONMETRIC.PEAK_ASYMMETRY, COMPRESSIONMETRIC.NON_COPLANARITY]

In [None]:
for metric in plot_metrics:
    metric_by_time = df.groupby(["time"])[metric.value].mean()
    cytosim_converter.add_plot(
    ScatterPlotData(
        title=f"{metric} over time",
        xaxis_title="Time",
        yaxis_title=metric.value,
        xtrace=np.arange(len(metric_by_time))*1E-5,
        ytraces={
            f"repeat {repeat}": metric_by_time,
        },
    )
)

Save converted data

In [None]:
cytosim_converter.save(f"outputs/vary_compress_rate_0006_repeat_{repeat}")

### Process multiple repeats

In [None]:
box_size = 3.0
scale_factor = 100
colors = ["#F0F0F0", "#0000FF", "#FF0000", "#00FF00", "#FF00FF"]

Create initial trajectory data object

In [None]:
input_file_path = f"data/fiber_segment_curvature_0.txt"
fiber_data = cytosim_to_simularium(input_file_path, box_size=box_size, scale_factor=scale_factor, color=colors[0], actin_number=0)
cytosim_converter = CytosimConverter(fiber_data)

trajectory_data = cytosim_converter._data

Append additional repeats to trajectory data object

In [None]:
for repeat in range(1, num_repeats):
    input_file_path = f"data/fiber_segment_curvature_{repeat}.txt"
    fiber_data = cytosim_to_simularium(input_file_path, box_size=box_size, scale_factor=scale_factor, color=colors[repeat], actin_number=repeat)
    cytosim_converter = CytosimConverter(fiber_data)
    new_agent_data = cytosim_converter._data.agent_data

    trajectory_data.append_agents(new_agent_data)

In [None]:
all_repeats_converter = TrajectoryConverter(trajectory_data)

Add plots for all repeats

In [None]:
plot_metrics = [COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, COMPRESSIONMETRIC.SUM_BENDING_ENERGY, COMPRESSIONMETRIC.PEAK_ASYMMETRY, COMPRESSIONMETRIC.NON_COPLANARITY]

Get metrics for all repeats

In [None]:
df_list = []
for repeat in range(num_repeats):
    df_path = f"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv"
    df = pd.read_csv(df_path)   
    df["repeat"] = repeat
    df_list.append(df)
df_all = pd.concat(df_list)

Add plots for converter object

In [None]:
for metric in plot_metrics:
    ytraces = {}
    for repeat, df_repeat in df_all.groupby("repeat"):
        ytraces[f"repeat {repeat}"] = df_repeat.groupby(["time"])[metric.value].mean()

    all_repeats_converter.add_plot(
        ScatterPlotData(
            title=f"{metric.value} over time",
            xaxis_title="Time",
            yaxis_title=metric.value,
            xtrace=np.arange(metric_by_time.shape[0])*1E-5,
            ytraces=ytraces,
            render_mode="lines",
        )
    )

Save converted data

In [None]:
all_repeats_converter.save(f"outputs/vary_compress_rate_0006_all_repeats")