In [1]:
import sys
from io import StringIO
from IPython import get_ipython
import datetime
from shutil import copyfile



class IpyExit(SystemExit):
    """Exit Exception for IPython.

    Exception temporarily redirects stderr to buffer.
    """
    def __init__(self):
        print("exiting")
        sys.stderr = StringIO()

    def __del__(self):
        sys.stderr.close()
        sys.stderr = sys.__stderr__  # restore from backup

def ipy_exit():
    raise IpyExit

-------------------------------------
## Prepare Source, Data and Directories
- set input/output directories and user settings
- download source from github
- replace data directory with precomputed one

### **REQUIREMENTS:**
   - you have joined the project `covid19dynstat`
   - you have created the directory `$PROJECT_covid19dynstat/$USER`
   - you have copied/linked a `data` directory to `$PROJECT_covid19dynstat/$USER/data`
       - will replace `data` from git repository
       - MUST include `counties/counties.pkl` (explicitly for JURECA, not from git repo!)

In [3]:
import datetime
# fixed parameters
user_email = "leffenberger@uos.de"
cmpprj_name = "covid19dynstat" # do not change (used in slurm-file)

# run:
run_name = "BSTIM-Covid19_Window_Final"

# source: clone only first time with 'run_name'
git_repo = "https://github.com/neuroinfo-os/BSTIM-Covid19.git"
git_branch = "3WeekPred"

# csv: update if missing or requested
csv_update = True

# run slurm dependency chain
slurm_chain = False

today_date = datetime.date(2020,7,6) # year, month, day
sample_date = today_date - datetime.timedelta(days=26)
print(sample_date)
start_date = datetime.date(2020,1,28)
sample_date_id = (sample_date - start_date).days

# simulation: sample_ia
submit_job_ia   = True
job_ia_nodes        = 25 # 25
job_ia_taskspernode = 4 # 4
job_ia_runtime      = '24:00:00' # '24:00:00'

# simulation: sample_posterior
submit_job_post   = True
job_post_nodes        = 42 # 42
job_post_taskspernode = 6 # 6
job_post_runtime      = '8:00:00' # '24:00:00'

2020-06-10


In [4]:
print(sample_date_id)

134


### Set input/output directories and user settings

In [5]:
import os

# check users project directory
# (do NOT change as it is used in SLURM-jobscript, too)
user_proj_dir = os.path.join(os.getenv('PROJECT_'+cmpprj_name), os.getenv('USER'))
if not os.path.isdir(user_proj_dir):
    print("ERROR: project directory has no directory of the current user.")
    print("       please create ", user_proj_dir)
    raise IpyExit
else:
    print("user_proj_dir = ", user_proj_dir)
    
# check for SLURM output directory
# (do not change as it is set in SLURM-jobscript, too)
slurm_out_dir = os.path.join(user_proj_dir, 'run')
if not os.path.isdir(user_proj_dir):
    print("ERROR: SLURM output directory does not exist.")
    print("       please create ", slurm_out_dir)
    raise IpyExit
else:
    print("slurm_out_dir = ", slurm_out_dir)

user_proj_dir =  /p/project/covid19dynstat/effenberger2
slurm_out_dir =  /p/project/covid19dynstat/effenberger2/run


### Download source 

In [6]:
import os
from datetime import datetime
from git import Repo

# auto-generate run_name if empty
if not run_name:
    run_name = datetime.now().strftime("BSTIM-Covid19_%Y-%m-%d_%H-%M-%S_" + git_branch)

# set source_dir
source_dir = os.path.join(user_proj_dir, run_name)
print("source_dir = ", source_dir)

# set source_data_dir
source_data_dir = os.path.join(source_dir, 'data')
print("source_data_dir = ", source_data_dir)

# clone source if required
if not os.path.exists(source_dir):
    print("cloning git repository")

    if not git_branch:
        git_branch = 'master'
    print("git_branch = ", git_branch)

    if not git_repo:
        git_repo = 'https://github.com/neuroinfo-os/BSTIM-Covid19.git'
    print("git_repo = ", git_repo)

    repo = Repo.clone_from(git_repo, source_dir, branch=git_branch)
    #repo.heads['tag-name'].checkout() # checkout a certain tag

    # rename data dir if from clean checkout
    if os.path.exists(source_data_dir):
        os.rename(source_data_dir,  os.path.join(source_dir,'data_git'))
else:
    print("NO cloning of git repository")

source_dir =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final
source_data_dir =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data
NO cloning of git repository


### Replace data directory with precomputed one

In [7]:
import os
import shutil

# copy new data dir with precomputed pickel file
# if data directory does not exist
if not os.path.exists(source_data_dir):
    print("replacing data directory")
    user_data_dir = os.path.join(user_proj_dir, 'data')
    if os.path.exists(user_data_dir) and os.path.isdir(user_data_dir):
        shutil.copytree(user_data_dir, source_data_dir)
    else:
        print("ERROR: user data directory does not exists or is no directory.")
        raise IpyExit
else:
    print("NO replacing of data directory")

NO replacing of data directory


-------------------------------------------------------
## Get COVID-19 data from RKI
Covid-19 data is provided by the [Robert Koch Institute][4] via the publically accessiable [this](https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0/data?orderBy=Meldedatum) link.  
We download the CSV table and store it in `./data/raw/`

### Download COVID-19 CSV

In [8]:
import os
import requests
from datetime import datetime

raw_csv_fpath = os.path.join(source_data_dir, 'raw', "covid19.csv")
print("raw_csv_fpath = ", raw_csv_fpath)

# download csv data if asked for
if not os.path.exists(raw_csv_fpath) or csv_update:
    print('download COVID-19 data')

    raw_csv_url = 'https://opendata.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0.csv'
    raw_csv_red = requests.get(raw_csv_url, allow_redirects=True)
    print("raw_csv_url = ", raw_csv_url)

    open(raw_csv_fpath, 'wb').write(raw_csv_red.content)
else:
    print("NO download of COVID-19 data")

raw_csv_fpath =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/raw/covid19.csv
download COVID-19 data
raw_csv_url =  https://opendata.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0.csv


### Preprocess COVID-19 CSV
The downloadable CSV table can be found in `./data/raw/` and is preprocessed to fit the BSTI Model implementation.

In [9]:
import os
import pandas as pd
import numpy as np
import json
import csv
import argparse
import re

from collections import OrderedDict

def preprocess_covid19_table(input_csv, output_csv):
    """Converts Covid-19 Data.Tabels provided by the RKI to a simpler format to fit the model"""

    counties = OrderedDict()
    with open("/p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/raw/germany_county_shapes.json", "r") as data_file:
        shape_data = json.load(data_file)

    for idx, val in enumerate(shape_data["features"]):
        id_current = val["properties"]["RKI_ID"]
        name_current = val["properties"]["RKI_NameDE"]

        counties[name_current] = id_current

    covid19_data = pd.read_csv(input_csv, sep=',')
    #print(covid19_data)
    # this complicated procedure removes timezone information.
    regex = re.compile(r"([0-9]+)/([0-9]+)/([0-9]+).*")
    start_year, start_month, start_day = regex.search(
        covid19_data['Meldedatum'].min()).groups()
    end_year, end_month, end_day = regex.search(
        covid19_data['Meldedatum'].max()).groups()
    start_date = pd.Timestamp(
        int(start_year), int(start_month), int(start_day))
    end_date = pd.Timestamp(int(end_year), int(end_month), int(end_day))

    dates = [day for day in pd.date_range(start_date, end_date)]
    print(start_date)
    df = pd.DataFrame(index=dates)
    for county_name in counties:
        print('.',end='')
        series = np.zeros(len(df), dtype=np.int32)
        lk_data = covid19_data[covid19_data['Landkreis'] == county_name]
        for (d_id, day) in enumerate(dates):
            day_string = "{:04d}/{:02d}/{:02d} 00:00:00".format(day.year, day.month, day.day)
            cases = np.sum(lk_data[lk_data['Meldedatum']
                                   == day_string]['AnzahlFall'])
            if cases > 0:
                series[d_id] = cases
        df.insert(len(df.columns), counties[county_name], series)
    df_total = df.sum(axis=1)
    df_total.to_csv("/p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/raw/../diseases/covid19_total.csv")
    df.to_csv(output_csv, sep=",")

data_csv_fpath = os.path.join(os.path.dirname(raw_csv_fpath), "..", "diseases", os.path.basename(raw_csv_fpath))
print("data_csv_fpath = ", data_csv_fpath)

if not os.path.exists(data_csv_fpath) or csv_update:
    print("preprocessing csv ", end='')
    preprocess_covid19_table(raw_csv_fpath, data_csv_fpath)
    print(" finished")
else:
    print("NO preprocessing csv")

data_csv_fpath =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/raw/../diseases/covid19.csv
preprocessing csv 2020-01-28 00:00:00
............................................................................................................................................................................................................................................................................................................................................................................................................................



 finished


----------------------------------------
## Simulation 1: Sample Interaction Effects (100x)

SAMPLE NEW IA EFFECTS FOR WINDOW (CURRENTLY 3 WEEKS)

`sample_ia_effects_window.py` reads  
- ../data/counties/counties.pkl
- ../data/diseases/{covid19}.csv

and outputs:
- ../data/ia_effect_samples/${sample_date}/{}_{}.pkl

sample_ia_effects.py is called 100x by SLURM (=farming) and calculates the same thing with different random numbers:  
- gridjob_sample_ia.slurm 
  - `#SBATCH --array=1-100:4`
- gridjob_sample_ia.slurm.sh
  - `THEANO_FLAGS="base_compiledir=${TASK_DIR}/,floatX=float32,device=cpu,openmp=True,mode=FAST_RUN,warn_float64=warn" OMP_NUM_THREADS=8  python3 sample_ia_effects_window.py > ${TASK_DIR}/log.txt`
- sample_ia_effects_window.py
  - ```nums_sample = range(100)
GID = int(os.environ["SGE_TASK_ID"])
num_sample = nums_sample[GID - 1]
filename = "../data/ia_effect_samples/${sample_date}/{}_{}.pkl".format(disease, num_sample)```

In [10]:
import os

# slurm jobfile
slurm_jobfile_ia = os.path.join(source_dir, 'src', "gridjob_sample_ia_window.slurm")
if not os.path.exists(slurm_jobfile_ia):
    print("ERROR: original SLURM job file does not exist")
    print("       please check for ", slurm_jobfile_ia)
    raise IpyExit
print("slurm_jobfile_ia = ", slurm_jobfile_ia)
    
slurm_shfile_ia_original = os.path.join(source_dir, 'src', "gridjob_sample_ia_window.slurm.sh")
if not os.path.exists(slurm_shfile_ia_original):
    print("ERROR: original SLURM sh file does not exist")
    print("       please check for ", slurm_shfile_ia_original)
    raise IpyExit
print("slurm_shfile_ia = ", slurm_shfile_ia_original)


slurm_shfile_ia = os.path.join(source_dir, 'src', "gridjob_sample_ia_window_{}.slurm.sh".format(sample_date_id))
copyfile(slurm_shfile_ia_original, slurm_shfile_ia)
    
# check is shell-script has executable bit
if not os.access(slurm_shfile_ia, os.X_OK):
    with open(slurm_shfile_ia, "r") as fd:
        os.chmod(fd.fileno(), 0o755)

if submit_job_ia:
    
    # check required input
    file_counties = os.path.join(source_data_dir, 'counties', 'counties.pkl')
    if not os.path.isfile(file_counties):
        print("ERROR: file data/counties.pkl is missing")
        raise IpyExit
    else:
        print("file_counties = ", file_counties)
        
    file_covid19 = os.path.join(source_data_dir, 'diseases', 'covid19.csv')
    if not os.path.isfile(file_covid19):
        print("ERROR: file data/covid19.csv is missing")
        raise IpyExit
    print("file_covid19 = ", file_covid19)
    
    # check output directory
    dir_iaeffect = os.path.join(source_data_dir, 'ia_effect_samples')
    if not os.path.exists(dir_iaeffect):
        os.mkdir(dir_iaeffect)    
    if not os.path.isdir(dir_iaeffect):
        print("ERROR: " + dir_iaeffect + " not a directroy")
        raise IpyExit
    #if os.listdir(dir_iaeffect):
    #    print("ERROR: directory " + dir_iaeffect + " not empty")
    #    raise IpyExit
    print("dir_iaeffect = ", dir_iaeffect)

else:
    print("NO checking")

slurm_jobfile_ia =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src/gridjob_sample_ia_window.slurm
slurm_shfile_ia =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src/gridjob_sample_ia_window.slurm.sh
file_counties =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/counties/counties.pkl
file_covid19 =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/diseases/covid19.csv
dir_iaeffect =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/ia_effect_samples


In [11]:
os.chdir(os.path.join(source_dir,'src'))
print(os.getcwd())

/p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src


In [12]:
import fileinput
fileinput.close()

def prepare_jobfile_ia(slurm_jobfile_ia):
    
    # replace lines with user-info
    finput = fileinput.input(slurm_jobfile_ia, inplace=1)
    try:
        for line in finput:
            line = "#SBATCH --job-name={}_IA_ROUT\n".format(sample_date_id) if "#SBATCH --job-name=" in line else line
            line = "#SBATCH --array=1-{}:{}\n".format(job_ia_nodes * job_ia_taskspernode, job_ia_taskspernode) if "#SBATCH --array=" in line else line
            line = "#SBATCH --ntasks-per-node={}\n".format(job_ia_taskspernode) if "#SBATCH --ntasks-per-node=" in line else line
            line = "#SBATCH --nodes=1\n"                          if "#SBATCH --nodes="       in line else line
            line = "#SBATCH --time={}\n".format(job_ia_runtime) if "#SBATCH --time="        in line else line
            line = "#SBATCH --export=DATE_ID={}\n".format(sample_date_id) if "#SBATCH --export=DATE_ID" in line else line
            line = "#SBATCH --mail-type=ALL\n"                  if "# #SBATCH --mail-type=" in line else line
            line = "#SBATCH --mail-user=" + user_email + '\n'   if "# #SBATCH --mail-user=" in line else line
            line = "# mkdir -p ${PROJECT}/${USER}/runs/\n" if "mkdir -p ${PROJECT}/${USER}/runs/" in line else line
            line = "srun --exclusive -n ${{SLURM_NTASKS}} gridjob_sample_ia_window_{}.slurm.sh".format(sample_date_id) if "srun" in line else line
            print(line, end='')
        finput.close()
    except:
        print("ERROR: could not prepare slurm job file")
        raise IpyExit
    #finput.close()

if submit_job_ia:
    prepare_jobfile_ia(slurm_jobfile_ia)
else:
    print("NO submit of slurm ia job")

In [13]:
import fileinput

def prepare_shfile_ia(slurm_shfile_ia):
    
    # replace lines with user-info
    finput = fileinput.input(slurm_shfile_ia, inplace=1)
    try:
        for line in finput:
            line = "DAT_ID={}\n".format(sample_date_id) if "DAT_ID=" in line else line
            print(line, end='')
    except:
        print("ERROR: could not prepare slurm job file")
        raise IpyExit
    finput.close()

if submit_job_ia:
    prepare_shfile_ia(slurm_shfile_ia)
else:
    print("NO submit of slurm ia job")

In [14]:
import os
import time
from subprocess import (Popen, PIPE)

def submit_job(slurm_jobfile, submit_dir, sbatch_addargs=''):
    print("submitting slurm job")
    
    # build sbatch command
    sbatch_args = sbatch_addargs + " " + slurm_jobfile
    sbatch_cmd = ['sbatch'] + sbatch_args.split()
    print("sbatch_cmd = ", sbatch_cmd)

    # submit SLURM job
    process = Popen(sbatch_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=submit_dir)
    
    # block until finished and output stdout, stderr
    stdout, stderr = process.communicate() 
    sbatch_out = stdout.decode("utf-8")
    sbatch_err = stderr.decode("utf-8")
    
    print("---- stdout ----")        
    print(sbatch_out)
    print("---- stderr ----")
    print(sbatch_err)
        
    if process.returncode != 0:
        raise IpyExit
    
    # get SLURM job id
    slurm_jobid = ''
    if sbatch_out:
        slurm_jobid = sbatch_out.split()[-1]
    print("slurm_jobid = ", slurm_jobid)

    # save SLURM job id to file
    if slurm_jobid:
        with open(os.path.join(submit_dir, slurm_jobfile + ".sbatchout"), "w") as ofile:
            print("jobid: {}".format(slurm_jobid), file=ofile)

In [15]:


submit_dir_ia = os.path.join(source_dir, 'src')
print("submit_dir_ia = ", submit_dir_ia)

if submit_job_ia:
    submit_job(slurm_jobfile_ia, submit_dir_ia, '-vv')
    
    # better wait for a few seconds to ensure slurm has processed the new job internally
    # this ensures, that squeue will show at least some information
    time.sleep(5)
else:
    print("NO submit of slurm ia job")

submit_dir_ia =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src
submitting slurm job
sbatch_cmd =  ['sbatch', '-vv', '/p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src/gridjob_sample_ia_window.slurm']
---- stdout ----
Submitted batch job 8409333

---- stderr ----
sbatch: defined options
sbatch: -------------------- --------------------
sbatch: account             : covid19dynstat
sbatch: array               : 1-100:4
sbatch: job-name            : 134_IA_ROUT
sbatch: licenses            : home@just,project@just,scratch@just
sbatch: nodes               : 1
sbatch: ntasks-per-node     : 4
sbatch: partition           : batch
sbatch: verbose             : 2
sbatch: -------------------- --------------------
sbatch: end of defined options
sbatch: debug:  propagating RLIMIT_CORE=0
sbatch: debug:  propagating SLURM_PRIO_PROCESS=0
sbatch: debug:  propagating UMASK=0022
sbatch: debug:  Munge authentication plugin loaded
sbatch: Cray/Aries node selection 

In [16]:
from subprocess import (Popen, PIPE)

def status_job(slurm_jobid):
                  
    # build squeue command
    squeue_args = ' -l -u ' + os.getenv('USER') + ' -j ' + slurm_jobid
    squeue_cmd = ['squeue'] + squeue_args.split()
    print("squeue_cmd = ", squeue_cmd)

    # show status
    squeue_out = ''
    process = Popen(squeue_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
    stdout, stderr = process.communicate()
    if stderr:
        print(stdout.decode("utf-8"))
        print(stderr.decode("utf-8"))
        #raise IpyExit
    return stdout.decode("utf-8")

def get_slurm_jobid(sbatchout_file):

    # get slurm job id from file
    if os.path.exists(sbatchout_file):
        with open(sbatchout_file) as ifile:
            for line in ifile:
                if 'jobid:' in line:
                    slurm_jobid = line.split()[-1]
                    print("slurm_jobid = ", slurm_jobid)
                    return slurm_jobid
    else:
        print("ERROR: not found " + sbatchout_file)

# get job id
submit_dir_ia = os.path.join(source_dir, 'src')
sbatchout_file_ia = os.path.join(submit_dir_ia, slurm_jobfile_ia + ".sbatchout")
slurm_jobid_ia = get_slurm_jobid(sbatchout_file_ia)



# call 'squeue'
if submit_job_ia and slurm_jobid_ia:            
    squeue_out_ia = status_job(slurm_jobid_ia)
    
    print(squeue_out_ia)
    
else:
    print("NO submit of slurm ia job")

slurm_jobid =  8409333
squeue_cmd =  ['squeue', '-l', '-u', 'effenberger2', '-j', '8409333']
Mon Jul 20 11:02:52 2020
             JOBID PARTITION     NAME     USER    STATE       TIME TIME_LIMI  NODES NODELIST(REASON)
  8409333_[1-97:4]     batch 134_IA_R effenber  PENDING       0:00   1:00:00      1 (Priority)



----------------------------------------
## Simulation 2: Sample Posterior per Window
`sample_window.py` reads  
- ./data/counties/counties.pkl
- ./data/ia_effect_samples/${sample_date}/covid19_{}.pkl


and outputs:
- ../data/mcmc_samples_backup/

sample_window.py is called
- gridjob_sample_posterior.slurm 
  - `#SBATCH --array=1'
- gridjob_sample_posterior.slurm.sh
  - `THEANO_FLAGS="base_compiledir=${TASK_DIR}/,floatX=float32,device=cpu,openmp=True,mode=FAST_RUN,warn_float64=warn" python3 sample_window.py > ${TASK_DIR}/log.txt`
- sample_window.py

```
filename_params = "../data/mcmc_samples_backup/parameters_{}_{}".format(disease, sample_date_id)
filename_pred = "../data/mcmc_samples_backup/predictions_{}_{}.pkl".format(disease, sample_date_id)
filename_pred_trend = "../data/mcmc_samples_backup/predictions_trend_{}_{}.pkl".format(disease, sample_date_id)
filename_model = "../data/mcmc_samples_backup/model_{}_{}.pkl".format(disease, sample_date_id)```.ipynb_checkpoints/

### Check requirements

In [18]:
import os

# check for SLURM job file
slurm_jobfile_post = os.path.join(source_dir, 'src', "gridjob_sample_window_posterior.slurm")
if not os.path.exists(slurm_jobfile_post):
    print("ERROR: original SLURM job file does not exist")
    print("       please check for ", slurm_jobfile_post)
    raise IpyExit
else:
    print("slurm_jobfile_post = ", slurm_jobfile_post)

slurm_shfile_post_original = os.path.join(source_dir, 'src', "gridjob_sample_window_posterior.slurm.sh")

slurm_shfile_post = os.path.join(source_dir, 'src', "gridjob_sample_window_posterior_{}.slurm.sh".format(sample_date_id))
copyfile(slurm_shfile_post_original, slurm_shfile_post)

if not os.path.exists(slurm_shfile_post):
    print("ERROR: original SLURM sh file does not exist")
    print("       please check for ", slurm_shfile_post)
    raise IpyExit
print("slurm_shfile_post = ", slurm_shfile_post)
    
# check is shell-script has executable bit
if not os.access(slurm_shfile_post, os.X_OK):
    with open(slurm_shfile_post, "r") as fd:
        os.fchmod(fd.fileno(), 0o755)

if submit_job_post:
    
    # check required input
    file_counties = os.path.join(source_data_dir, 'counties', 'counties.pkl')
    if not os.path.isfile(file_counties):
        print("ERROR: file data/counties.pkl is missing")
        raise IpyExit
    else:
        print("file_counties = ", file_counties)

    if not slurm_chain:
        # check output of sample_ia (only if we slurm dependencies)
        dir_iaeffect = os.path.join(source_data_dir, 'ia_effect_samples')
        for i in range(100):
            file_iaeffect = os.path.join(dir_iaeffect, "covid19_{}.pkl".format(i))
            if not os.path.isfile(file_iaeffect):
                print("ERROR: file " + file_iaeffect + "  is missing")
                raise IpyExit
        print("file_iaeffect = " + dir_iaeffect + "/covid19_{}.pkl")
    
    # check output directory
    dir_mcmcsamples = os.path.join(source_data_dir, 'mcmc_samples_backup')
    if not os.path.exists(dir_mcmcsamples):
        os.mkdir(dir_mcmcsamples)
    if not os.path.isdir(dir_mcmcsamples):
        print("ERROR: " + dir_mcmcsamples + " not a directory")
        raise IpyExit
        
    #if len(os.listdir(dir_mcmcsamples)) > 1:
    #    print("ERROR: directory " + dir_mcmcsamples + " not empty")
    #    raise IpyExit
    print("dir_mcmcsamples = ", dir_mcmcsamples)

slurm_jobfile_post =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src/gridjob_sample_window_posterior.slurm
slurm_shfile_post =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src/gridjob_sample_window_posterior_134.slurm.sh
file_counties =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/counties/counties.pkl
file_iaeffect = /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/ia_effect_samples/covid19_{}.pkl
dir_mcmcsamples =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/mcmc_samples_backup


In [19]:
fileinput.close()

### Prepare SLURM job file (sample_posterior)

In [20]:
import fileinput

IA_DONE = False

def prepare_jobfile_post(slurm_jobfile_post):
    
    # replace lines with user-info
    finput = fileinput.input(slurm_jobfile_post, inplace=1)
    try:
        for line in finput:
            line = "#SBATCH --job-name={}_POST_ROUT\n".format(sample_date_id) if "#SBATCH --job-name=" in line else line
            #line = "#SBATCH --array=1-{}\n".format(120) if "#SBATCH --array=" in line else line
            line = "#SBATCH --ntasks-per-node={}\n".format(1) if "#SBATCH --ntasks-per-node=" in line else line
            line = "#SBATCH --nodes=1\n"                          if "#SBATCH --nodes="       in line else line
            line = "#SBATCH --time={}\n".format(job_post_runtime) if "#SBATCH --time="        in line else line
            line = "#SBATCH --mail-type=ALL\n"                    if "# #SBATCH --mail-type=" in line else line
            line = "#SBATCH --mail-user=" + user_email + '\n'     if "# #SBATCH --mail-user=" in line else line
            line = "# mkdir -p ${PROJECT}/${USER}/runs/\n" if "mkdir -p ${PROJECT}/${USER}/runs/" in line else line
            if IA_DONE:
                line = "##SBATCH --dependency=afterany:\n" if "#SBATCH --dependency" in line else line
            else:
                line = "#SBATCH --dependency=afterany:{}\n".format(slurm_jobid_ia) if "#SBATCH --dependency" in line else line
            line = "srun --exclusive -n ${{SLURM_NTASKS}} gridjob_sample_window_posterior_{}.slurm.sh\n".format(sample_date_id) if "srun" in line else line
            print(line, end='')
    except:
        print("ERROR: could not prepare slurm job file")
        raise IpyExit
    finput.close()

if submit_job_post:
    prepare_jobfile_post(slurm_jobfile_post)
else:
    print("NO submit of slurm ia job")

In [21]:

def prepare_shfile_post(slurm_shfile_post):
    
    # replace lines with user-info
    finput = fileinput.input(slurm_shfile_post, inplace=1)
    try:
        for line in finput:
            line = "DAT_ID={}\n".format(sample_date_id) if "DAT_ID=" in line else line
            print(line, end='')
    except:
        print("ERROR: could not prepare slurm job file")
        raise IpyExit
    finput.close()

if submit_job_ia:
    prepare_shfile_post(slurm_shfile_post)
else:
    print("NO submit of slurm ia job")

### Submit SLURM job (sample_posterior)

In [22]:
submit_dir_post = os.path.join(source_dir, 'src')
print("submit_dir_post = ", submit_dir_post)

if submit_job_post:
    
    sbatch_addargs = '-vv'

    if slurm_chain:
        sbatchout_file_ia = os.path.join(submit_dir_ia, slurm_jobfile_ia + ".sbatchout")
        slurm_jobid_ia = get_slurm_jobid(sbatchout_file_ia)
        #if slurm_jobid_ia:
        #    sbatch_addargs += " --dependency=aftercorr:{}".format(slurm_jobid_ia)
    
    submit_job(slurm_jobfile_post, submit_dir_post, sbatch_addargs)
    

    # better wait for a few seconds to ensure slurm has processed the new job internally
    # this ensures, that squeue will show at least some information
    time.sleep(5)
else:
    print("NO submit of slurm job")

submit_dir_post =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src
submitting slurm job
sbatch_cmd =  ['sbatch', '-vv', '/p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src/gridjob_sample_window_posterior.slurm']
---- stdout ----
Submitted batch job 8409343

---- stderr ----
sbatch: defined options
sbatch: -------------------- --------------------
sbatch: account             : covid19dynstat
sbatch: array               : 1
sbatch: dependency          : afterany:8409333
sbatch: error               : /p/project/covid19dynstat/%u/runs/%A_e.txt
sbatch: job-name            : 134_POST_ROUT
sbatch: licenses            : home@just,project@just,scratch@just
sbatch: mail-type           : BEGIN,END,FAIL,REQUEUE,STAGE_OUT
sbatch: mail-user           : leffenberger@uos.de
sbatch: nodes               : 1
sbatch: ntasks-per-node     : 1
sbatch: output              : /p/project/covid19dynstat/%u/runs/%A_o.txt
sbatch: partition           : batch
sbatch: time     

### Show status of submitted job (sample_ia)

In [23]:
# get job id
sbatchout_file_post = os.path.join(submit_dir_post, slurm_jobfile_post + ".sbatchout")
slurm_jobid_post = get_slurm_jobid(sbatchout_file_post)

# call 'squeue'
if slurm_jobid_post:
    squeue_out_post = status_job(slurm_jobid_post)
    print(squeue_out_post)
else:
    print("ERROR: no slurm job id found")

slurm_jobid =  8409343
squeue_cmd =  ['squeue', '-l', '-u', 'effenberger2', '-j', '8409343']
Mon Jul 20 11:03:36 2020
             JOBID PARTITION     NAME     USER    STATE       TIME TIME_LIMI  NODES NODELIST(REASON)
       8409343_[1]     batch 134_POST effenber  PENDING       0:00   8:00:00      1 (Dependency)



In [24]:
fileinput.close()

## PRODUCE THE PLOTS

In [25]:
import os

# check for SLURM job file
slurm_jobfile_plots = os.path.join(source_dir, 'src', "gridjob_produce_plots.slurm")
if not os.path.exists(slurm_jobfile_post):
    print("ERROR: original SLURM job file does not exist")
    print("       please check for ", slurm_jobfile_post)
    raise IpyExit
else:
    print("slurm_jobfile_post = ", slurm_jobfile_post)

slurm_shfile_plots_original = os.path.join(source_dir, 'src', "gridjob_produce_plots.slurm.sh")
slurm_shfile_plots = os.path.join(source_dir, 'src', "gridjob_produce_plots_{}.slurm.sh".format(sample_date_id))
copyfile(slurm_shfile_plots_original, slurm_shfile_plots )
if not os.path.exists(slurm_shfile_plots):
    print("ERROR: original SLURM sh file does not exist")
    print("       please check for ", slurm_shfile_plots)
    raise IpyExit
print("slurm_shfile_ia = ", slurm_shfile_plots)

    
# check is shell-script has executable bit
if not os.access(slurm_shfile_post, os.X_OK):
    with open(slurm_shfile_post, "r") as fd:
        os.fchmod(fd.fileno(), 0o755)

if submit_job_post:
    
    # check required input
    file_counties = os.path.join(source_data_dir, 'counties', 'counties.pkl')
    if not os.path.isfile(file_counties):
        print("ERROR: file data/counties.pkl is missing")
        raise IpyExit
    else:
        print("file_counties = ", file_counties)

    if not slurm_chain:
        # check output of sample_ia (only if we slurm dependencies)
        dir_iaeffect = os.path.join(source_data_dir, 'ia_effect_samples')
        for i in range(100):
            file_iaeffect = os.path.join(dir_iaeffect, "covid19_{}.pkl".format(i))
            if not os.path.isfile(file_iaeffect):
                print("ERROR: file " + file_iaeffect + "  is missing")
                raise IpyExit
        print("file_iaeffect = " + dir_iaeffect + "/covid19_{}.pkl")
    
    # check output directory
    dir_mcmcsamples = os.path.join(source_data_dir, 'mcmc_samples_backup')
    if not os.path.exists(dir_mcmcsamples):
        os.mkdir(dir_mcmcsamples)
    if not os.path.isdir(dir_mcmcsamples):
        print("ERROR: " + dir_mcmcsamples + " not a directory")
        raise IpyExit
        
    #if len(os.listdir(dir_mcmcsamples)) > 1:
    #    print("ERROR: directory " + dir_mcmcsamples + " not empty")
    #    raise IpyExit
    print("dir_mcmcsamples = ", dir_mcmcsamples)

slurm_jobfile_post =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src/gridjob_sample_window_posterior.slurm
slurm_shfile_ia =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src/gridjob_produce_plots_134.slurm.sh
file_counties =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/counties/counties.pkl
file_iaeffect = /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/ia_effect_samples/covid19_{}.pkl
dir_mcmcsamples =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/data/mcmc_samples_backup


In [26]:
fileinput.close()
import fileinput

def prepare_jobfile_plots(slurm_jobfile_plots):
    
    # replace lines with user-info
    finput = fileinput.input(slurm_jobfile_plots, inplace=1)
    try:
        for line in finput:
            line = "#SBATCH --job-name={}_PLOT_ROUT\n".format(sample_date_id) if "#SBATCH --job-name=" in line else line
            line = "#SBATCH --dependency=afterany:{}\n".format(slurm_jobid_post) if "#SBATCH --dependency" in line else line
            line = "srun --exclusive -n ${{SLURM_NTASKS}} gridjob_produce_plots_{}.slurm.sh\n".format(sample_date_id) if "srun" in line else line
            print(line, end='')
    except:
        print("ERROR: could not prepare slurm job file")
        raise IpyExit
    finput.close()

if submit_job_post:
    prepare_jobfile_plots(slurm_jobfile_plots)
else:
    print("NO submit of slurm ia job")

In [27]:
def prepare_shfile_plots(slurm_shfile_plots):
    
    # replace lines with user-info
    finput = fileinput.input(slurm_shfile_plots, inplace=1)
    try:
        for line in finput:
            line = "DAT_ID={}\n".format(sample_date_id) if "DAT_ID=" in line else line
            print(line, end='')
    except:
        print("ERROR: could not prepare slurm job file")
        raise IpyExit
    finput.close()

if submit_job_ia:
    prepare_shfile_plots(slurm_shfile_plots)
else:
    print("NO submit of slurm ia job")
    
# check is shell-script has executable bit
if not os.access(slurm_shfile_plots, os.X_OK):
    with open(slurm_shfile_plots, "r") as fd:
        os.fchmod(fd.fileno(), 0o755)

In [28]:
submit_dir_post = os.path.join(source_dir, 'src')
print("submit_dir_post = ", submit_dir_post)

if submit_job_post:
    
    sbatch_addargs = '-vv'

    if slurm_chain:
        sbatchout_file_ia = os.path.join(submit_dir_ia, slurm_jobfile_ia + ".sbatchout")
        slurm_jobid_ia = get_slurm_jobid(sbatchout_file_ia)
        #if slurm_jobid_ia:
        #    sbatch_addargs += " --dependency=aftercorr:{}".format(slurm_jobid_ia)
    
    submit_job(slurm_jobfile_plots, submit_dir_post, sbatch_addargs)
    

    # better wait for a few seconds to ensure slurm has processed the new job internally
    # this ensures, that squeue will show at least some information
    time.sleep(5)
else:
    print("NO submit of slurm job")

submit_dir_post =  /p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src
submitting slurm job
sbatch_cmd =  ['sbatch', '-vv', '/p/project/covid19dynstat/effenberger2/BSTIM-Covid19_Window_Final/src/gridjob_produce_plots.slurm']
---- stdout ----
Submitted batch job 8409344

---- stderr ----
sbatch: defined options
sbatch: -------------------- --------------------
sbatch: account             : covid19dynstat
sbatch: array               : 1
sbatch: dependency          : afterany:8409343
sbatch: error               : /p/project/covid19dynstat/%u/runs/%A_e.txt
sbatch: job-name            : 134_PLOT_ROUT
sbatch: licenses            : home@just,project@just,scratch@just
sbatch: mail-type           : BEGIN,END,FAIL,REQUEUE,STAGE_OUT
sbatch: mail-user           : leffenberger@uos.de
sbatch: nodes               : 1
sbatch: ntasks-per-node     : 1
sbatch: output              : /p/project/covid19dynstat/%u/runs/%A_o.txt
sbatch: partition           : batch
sbatch: time               