# Output Checker

#### biohazardCleanUp Function

In [1]:
# REQUIREMENTS
from os import listdir

# LISTDIR_NH FUNCTION
def listdir_nh(path):
    '''
    Input: Path of directory (string)
    Output: Non-hidden files within given directory (list of strings)
    '''
    files = listdir(path)
    parsed = [file for file in files if not file.startswith(".")]
    return parsed

In [2]:
# LIBRARIES
import pandas as pd

## 1. Set Parent Directory, Collect Files Inside

In [3]:
# MY PARENT DIRECTORY
padre = "/Volumes/EDIE ESPEJO/job_log_20171003/"

In [4]:
file_paths = [padre+x for x in listdir_nh(padre)]

In [5]:
# PREVIEW OF FILE PATHS
file_paths[0:30:5]

['/Volumes/EDIE ESPEJO/job_log_20171003/scripts_RevBayes_117_117_aic_Rev',
 '/Volumes/EDIE ESPEJO/job_log_20171003/scripts_RevBayes_196_8_196_8_aic_Rev',
 '/Volumes/EDIE ESPEJO/job_log_20171003/scripts_RevBayes_21_21_aic_Rev',
 '/Volumes/EDIE ESPEJO/job_log_20171003/scripts_RevBayes_28_28_aic_Rev',
 '/Volumes/EDIE ESPEJO/job_log_20171003/scripts_RevBayes_45_45_aic_Rev',
 '/Volumes/EDIE ESPEJO/job_log_20171003/scripts_RevBayes_4_6_4_6_aic_Rev']

In [6]:
# GETTING THE DATASET NAMES
def getAnalysisName(script_path):
    return(script_path.split("_", 5)[-1])

In [7]:
names = [getAnalysisName(x) for x in file_paths]

## 2. Outcome Checker

In [8]:
def checkFailures(script_path):
    # READ FILE AS STRING
    with open (script_path, "r") as this_file:
        data = this_file.read()
        
    # CHECK IF PARTICULAR STRINGS IN IT
    if "FAILURE" in data:
        return("Failure")
    if "Error:" in data:
        return("Error")
    
    return("")

In [9]:
outcomes = [checkFailures(x) for x in file_paths]

In [10]:
outcome_table = pd.DataFrame([names, outcomes], ["names", "outcomes"]).T

In [11]:
outcome_table_sorted = outcome_table.sort_values("outcomes", ascending=False)

In [12]:
outcome_table.head(30)

Unnamed: 0,names,outcomes
0,117_aic_Rev,
1,117_all_apart_Rev,
2,117_bic_Rev,
3,117_gene_Rev,
4,117_together_Rev,
5,8_196_8_aic_Rev,
6,8_196_8_all_apart_Rev,
7,8_196_8_bic_Rev,Failure
8,8_196_8_gene_Rev,
9,8_196_8_together_Rev,


In [13]:
issues = list(outcome_table["names"][outcome_table["outcomes"]!=""])

In [14]:
issues

['8_196_8_bic_Rev',
 '21_bic_Rev',
 '28_bic_Rev',
 '45_bic_Rev',
 '2013_fishes_Bloom_2013_fishes_bic_Rev',
 '2012_Brown_2012_aic_Rev',
 '2012_Brown_2012_all_apart_Rev',
 '2012_Brown_2012_bic_Rev',
 '2012_Brown_2012_gene_Rev',
 '2012_Brown_2012_together_Rev',
 '2013_birds_Cibois_2013_birds_bic_Rev',
 '2001_Cognato_2001_bic_Rev',
 '2012_butterflies_Condamine_2012_butterflies_bic_Rev',
 '2015_crabs_Daniels_2015_crabs_bic_Rev',
 '2013_Day_2013_bic_Rev',
 '2012_DORNBURG_2012_bic_Rev',
 '2010_Ekrem_2010_bic_Rev',
 '2001_Fishbein_2001_bic_Rev']

In [15]:
print("There were " + str(len(issues)) + " issues that either had FAILURE or an ERROR.")

There were 18 issues that either had FAILURE or an ERROR.


# 3. Total Processing Time Checker

In [16]:
def checkProcessingTime(script_path):
    # READ FILE AS STRING
    with open (script_path, "r") as this_file:
        data = this_file.read()
    data = data.split("\n")
        
    # CHECK TIME
    try:
        for i in range(len(data)):
            if "Total processing time" in data[i]:
                final_line = i
        return(data[final_line].split(": ")[1])
    except:
        return("")

In [17]:
times = [checkProcessingTime(x) for x in file_paths]

In [18]:
# PREVIEW SOME OF THE TIMES
times[0:30:2]

['03:27:09',
 '03:08:16',
 '03:34:02',
 '03:05:18',
 '03:56:15',
 '15:08:29',
 '00:00:00',
 '16:14:04',
 '21:23:00',
 '22:22:16',
 '05:14:06',
 '00:00:00',
 '05:09:27',
 '10:18:40',
 '11:11:32']

In [19]:
outcome_time_table = pd.DataFrame([names, outcomes, times], ["names", "outcomes", "hh:mm:ss"]).T

In [21]:
outcome_time_table.head(30)

Unnamed: 0,names,outcomes,hh:mm:ss
0,117_aic_Rev,,03:27:09
1,117_all_apart_Rev,,03:16:56
2,117_bic_Rev,,03:08:16
3,117_gene_Rev,,03:31:41
4,117_together_Rev,,03:34:02
5,8_196_8_aic_Rev,,04:09:38
6,8_196_8_all_apart_Rev,,03:05:18
7,8_196_8_bic_Rev,Failure,00:00:00
8,8_196_8_gene_Rev,,03:56:15
9,8_196_8_together_Rev,,04:26:22
