##### **This notebook sends the job in slurm**

In [1]:
import pandas as pd
import numpy as np
import warnings
import glob
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as pltc
from tqdm import tqdm
import sys
import os

import importlib

#### packages
import helper.strategy_helper as st
import helper.visualization as vz
import helper.helper as hp
import helper.file_helper as file_hp
import config.config as config_hp
import helper.pandas_helper as pd_hp

##### **Read config files**

In [2]:
config = config_hp.config()
path = config['PATHS']
poster_path = config['POSTER_PATH']
derived_path = path['derived_path']
all_tweet_data = path['all_tweet_data']
plot_path = path['plot_path']
conversation_path = path['conversation_path']

file_hp.create_folder(conversation_path, '5_reply_count')
reply_path_5 = os.path.join(conversation_path, '5_reply_count')

conversation_ids_5 = path['conversation_ids_5']
conv_ids_5_path = os.path.join(conversation_path, 
                               conversation_ids_5)
poster_dead_tweet_file = poster_path['poster_dead_tweet_file']

##### **Split the conversation file**

In [3]:
def read_file(file):
    '''
    This function read files and return the content
    :param file: file name with location
    '''
    
    with open(f'{file}') as f:
        lines = f.read().splitlines()

        return lines
    
    
ids = read_file(conv_ids_5_path)

In [4]:
print(len(ids))

96041


In [5]:
def keep_only_alive_conversations(ids, 
                                  file,
                                  save_path
                                 ):
    dead_ids = file_hp.read_file(file)
    alive_ids = list(set(ids) - set(dead_ids))
    
    file_hp.write_to_file_row_each_line(save_path,
                                         None,
                                         alive_ids)
    return alive_ids

save_path = conversation_path + os.sep + 'alive_conversation_ids.txt'

alive_ids = keep_only_alive_conversations(ids, 
                                          poster_dead_tweet_file,
                                          save_path)                                        

In [6]:
len(alive_ids)
save_path

'/N/slate/potem/data/derived/conversation/alive_conversation_ids.txt'

##### **Test few alive tweets**

In [7]:
test_alive = file_hp.read_file(save_path)
print(test_alive[:10])

['840268627353972736', '1174348484973142016', '930380638817046528', '902859409990320128', '1175143370064220160', '1212856206144016384', '1118070930780106752', '1000442215448104960', '1041773853142384640', '1059498356949573632']


In [12]:
splitted_test = file_hp.read_file(reply_path_5 + os.sep + 'job_1_0_10.txt')

print(splitted_test)

['840268627353972736', '1174348484973142016', '930380638817046528', '902859409990320128', '1175143370064220160', '1212856206144016384', '1118070930780106752', '1000442215448104960', '1041773853142384640', '1059498356949573632']


In [14]:
len(set(test_alive[:10]).intersection(set(splitted_test)))

10

In [20]:
split_threshold = 5
index = 1
for i in range(0, len(test_alive), split_threshold):
    ids_split = alive_ids[i:i+split_threshold]
    last = i + split_threshold
    
    #first is the index of job
    #second and third are the index of conversation ids
    filename = f'job_{index}_{i}_{last}.txt'
    
    file_hp.write_to_file_row_each_line(reply_path_5,
                                        filename,
                                        ids_split
                                       )
    
    index = index + 1

##### **Send the job to get conversations**

In [15]:
slurm_path_og='/N/u/potem/Quartz/sbatch'

file_hp.create_folder(slurm_path_og, 'multiple_jobs')

slurm_path = os.path.join(slurm_path_og, 
                        'multiple_jobs')

In [22]:
def create_jobs(path_conversation,
                python_script,
                path_slurm,
               ):
    '''
    Path of splited conversations
    :param path_conversation: path to conversation ids
    :param path_slurm: path where slurm script to be saved
    '''
    new_path = os.path.join(path_conversation, f'job_*.txt')
    i = 0
    for file in glob.glob(new_path):
        i = i + 1
        parts = file.split(os.sep)
        filename = (parts[-1]).split('.')[0]
        command = f'python {python_script} --file={file}'
        job_name = f'{filename}'
        
        script = \
f'''#!/bin/bash -i

#SBATCH --job-name={job_name}
#SBATCH -p general
#SBATCH -A general
#SBATCH -o {job_name}_%j.txt
#SBATCH -e {job_name}_%j.err
#SBATCH --mail-type=ALL
#SBATCH --mail-user=potem@iu.edu
#SBATCH --nodes=1
#SBATCH --time=2-00:00:00
#SBATCH --cpus-per-task=2 # Request that ncpus be allocated per process.
#SBATCH --mem=150GB


#Load any modules that your program needs
source /N/u/potem/Quartz/miniconda3/etc/profile.d/conda.sh
conda activate

#cd /N/u/potem/Quartz/sbatch/multiple_jobs/logs
#Run your program
srun {command}
        '''
        new_slurm_path = os.path.join(path_slurm, f'{job_name}.sh')
        
        with open(new_slurm_path, 'w') as f:
            f.write(script)

            
script_path = '/N/u/potem/Quartz/project/infoOps-strategy/script/py_scripts/data_sourcing/get_conversation.py'
# create_jobs(reply_path_5, script_path,
#             slurm_path)

In [10]:
# abs_path = os.path.dirname(os.path.abspath(__file__))

In [11]:
# python_command = f'python get_conversation.py --file={file}

##### **Despatch job**

In [28]:
def despatch_job(path, logs_path):
    
    for file in glob.glob(path):
        job_index = file.split('/')[-1].split('_')[1]
        
        if int(job_index) > 100:
            continue
            
        command = f'sbatch {file}'
        
        os.chdir(logs_path)
        os.system(command)

        
slurm_path_og='/N/u/potem/Quartz/sbatch'
slurm_path = os.path.join(slurm_path_og, 
                        'multiple_jobs')
file_hp.create_folder(slurm_path, 'logs')
logs_path = file_hp.create_folder(slurm_path, 'logs')
new_path = os.path.join(slurm_path, f'*.sh')

## Do not forget to change get_conversation file
# despatch_job(new_path, logs_path)

In [26]:
import helper.slurm_helper as slurm_hp

# slurm_hp.cancel_jobs(1473957, 1474056)