### concatenate_simulations

__Author:__ Emma Crenshaw \
__Date:__   7/18/2023 

__Purpose:__ Takes the output from batch array jobs on the cluster (embarassingly parallel) that include a RANGE of values, concatenates them into a single CSV and saves the raw files in a compressed ZIP folder. The raw files are then deleted from the main simulations output folder.

__Inputs__:
* specify the simulation inputs
* specify the date
* specify whether the simulation represents baseline or not (slightly different naming convention)

In [1]:
import pandas as pd
import numpy as np
import os
import zipfile as zf

In [48]:
sim_name = 'mpox_'
date = '2024-10-10'

num_sims = 100


intervention_start = list(range(30,120,10))
intervention_start_name = '30to110'

#intervention_start = [70]
#intervention_start_name = '70'

behavior_change = 2
#isolation = [0,1,2]
#isolation_name = '012'
isolation = [2]
isolation_name = '2'


behavior_change_perc = 0.25
vax_scenario = 2
#vax_delay = list(range(0, -35, -5)) + [-365, -730]
#vax_delay_name = '0to-30'
vax_delay = list(range(5, 35, 5))
vax_delay_name = '5to30'
#vax_delay = [30]
#vax_delay_name = '30'



baseline = 0

if sim_name == "mpox_":
    num_steps = 250
elif sim_name == "rstar_":
    num_steps = 16

if baseline == 0:
    sim_string = str(intervention_start_name) + '-' + str(behavior_change) + '-' + str(behavior_change_perc) + \
          '-' + str(isolation_name) + '-' + str(vax_delay_name) + '-' + str(vax_scenario)
else:
    sim_string = 'baseline'
    
location = 'output/' + sim_string + '/'

In [49]:
totnum = len(vax_delay)*len(intervention_start)*len(isolation)

# Note, the number of columns to add to num_steps depends on the data; the input from simulations with a range of intervention start times
#       needs +3, those with a single intervention start time only needs +2
# In the simulations with a single start time, the first column indicates the simulation number and the second indicates the isolation scenario
# In the simulations with a range of start times, the first column is the simulation number, the second is the start time for the 
#       behavioral intervention, and the third is the start time for the vaccinations

df = np.zeros((totnum,num_steps+3))

for i in range(1, num_sims+1):
    raw = np.array(pd.read_csv(location + str(sim_name) + sim_string + '_'+ str(i) + '_' + date + '.csv'))
    sim = np.array([i]*totnum)
    sim.shape = totnum , 1
    raw2 = np.append(sim, raw, axis=1)
    df = np.vstack((df, raw2))



In [50]:
df_final = pd.DataFrame(df)

df_final.to_csv(location + str(sim_name) + sim_string + '_'+ str(date)+'.csv', 
          header = False, index=False)

In [51]:
files = os.listdir(location)

relevant = [x for x in files if (date in x) and ((sim_name + sim_string) in x) and 
            (x != sim_name + sim_string + '_' + date + '.csv') and (x != sim_name + sim_string + '_' + date + '.zip')]

with zf.ZipFile('output/' + sim_name + sim_string + '_' + date +'.zip', 'w') as zipMe:        
    for file in relevant:
        zipMe.write(filename = location+file, arcname = file, compress_type=zf.ZIP_DEFLATED)
        
# now delete the files
for file in relevant:
    os.remove(location+file)