In [None]:
#Params
datasetPath = "../datasets/"
dataPath = "./data/"
resultPath = "./results/heatmaps/"
filenames = ["SQLite"]
perf="perf"


# Metrics names
metrics = ["accuracy","specificity","precision","recall","npv","negNPV","negAccuracy"]

In [None]:
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np

# Select the files you will use
def getFiles(baseFilename):
    returnFiles = []
    
    files = [f for f in listdir(dataPath) if isfile(join(dataPath, f)) and f.startswith(baseFilename)]
    files.sort()
    for filename in files:
        # Here only appear files starting with the value in baseFilename
        # You can add several filters as you want, example here by excluding file with a certain version number : 
        '''
        # Split the last one
        splitted = filename.split("-")
        # Get the last version
        num = splitted[len(splitted)-1].split(".")[0]
        
        if num == "0001":
            continue
        '''
        #Or simply excluding file by its name
        '''
        if filename == "SQLite-0002.csv":
            continue
        '''
        
        returnFiles.append(dataPath+filename)
        
    return returnFiles

# Used to get the min and max values through every concerned files
# Need it to have same scale for each heatmap, it's easier to compare then
def getMinMax(files):
    minArray={}
    maxArray={}
    for m in metrics:
        minArray[m] = 1
        maxArray[m] = 0


    for filename in files:
        df = pd.read_csv(filename)

        df['accuracy']= (df['TP']+df['TN']) / (df['TP']+df['TN']+df['FN']+df['FP'])
        df['specificity']=df['TN']/(df['TN']+df['FP'])
        df['precision']=df['TP']/(df['TP'] + df['FP'])
        df['recall']= df['TP'] / (df['TP']+ df['FN'])
        df['npv']=df['TN']/(df['TN']+df['FN'])

        df['index']=df.groupby('sr').cumcount()+1
        df['frequency']=max(df['index'])
        df['index']= df['index'] * 100 / df['frequency']

        df['negNPV']=df['npv']-(df['index']/100)
        df['negAccuracy']=df['accuracy']-(df['index']/100)

        for m in metrics:
            if max(df[m])>maxArray[m] and not np.isnan(max(df[m])):
                maxArray[m] = max(df[m])
            elif np.isnan(max(df[m])):
                if np.nanmax(df[m].values)>maxArray[m]:
                    maxArray[m] = np.nanmax(df[m].values)
            if min(df[m])<minArray[m] and not np.isnan(min(df[m])):
                minArray[m] = min(df[m])
            elif np.isnan(min(df[m])):
                if np.nanmin(df[m].values)<minArray[m]:
                    minArray[m] = np.nanmin(df[m].values)


    return pd.DataFrame([minArray,maxArray])

In [None]:

for baseFilename in filenames:
    
    if not os.path.exists(resultPath+baseFilename):
        try:
            os.makedirs(resultPath+baseFilename)
        except OSError as exc: # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
            
    files = getFiles(baseFilename)
    # Put min and max values in a csv file to be used by the R script for heatmaps
    getMinMax(files).to_csv(dataPath+"minmax-"+baseFilename+".csv", index=False)
    
    #Create heatmaps for each file for each metric
    for filename in files:
        !Rscript ./helpers/2.calculateMetricsV2.R {filename} {resultPath}{baseFilename}"/" {dataPath}"minmax-"{baseFilename}".csv"