In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
from PIL import Image, ImageDraw, ImageFont
from library.analysis import testSets, generators

# Constants

In [None]:
kScore = "cohens kappa score"
f1Score = "f1 score"
gScore = "G-mean score"
aScore = "average precision score"

kScoreSd = kScore + " - SD"
f1ScoreSd = f1Score + " - SD"
gScoreSd = gScore + " - SD"
aScoreSd = aScore + " - SD"

# Settings

In [None]:
ignoreSet = ["ozone_level", "yeast_me2"]

gans = [g.replace("SimpleGAN", "GAN") for g in generators.keys()]
algs = ["LR", "RF", "GB", "KNN", "DoC"]

gans

In [None]:
testSets = [t for t in testSets if t[0:7] == "folding"]

In [None]:
def cleanupName(name):
    return name.replace("folding_", "").replace("imblearn_", "").replace("kaggle_", "")

In [None]:
dataset = [cleanupName(d) for d in testSets]

In [None]:
def ganName(name):
    return name.replace(
        "ConvGeN-majority-5", "ConvGeN(5,maj)").replace(
        "ConvGeN-majority-full", "ConvGeN(min,maj)").replace(
        "ConvGeN-proximity-5", "ConvGeN(5,prox)").replace(
        "ConvGeN-proximity-full", "ConvGeN(min,prox)")

In [None]:
def filterConvGen(name):
    return not name.startswith("ConvGeN") or name == "ConvGeN(min,maj)"

# Load data from CSV files

In [None]:
def loadDiagnoseData(ganType, datasetName):
    def addAvg(rn, name, avg, items):
        avg = float(avg)
        rn[name] = avg
        rn[name + " - SD"] = math.sqrt(sum([(avg - x)*(avg - x) for x in items]) / len(items))
        
    fileName = f"data_result/{ganType}/{datasetName}.csv"
    r = {}
    try:
        f1List = []
        kList = []
        aList = []
        gList = []
        indizes = [str(x) for x in range(100)]
        with open(fileName) as f:
            newBlock = True
            n = ""
            for line in f:
                line = line.strip()
                if newBlock:
                    n = line
                    if n == "GAN" or n == "DoG":
                        n = "DoC"
                    newBlock = False
                elif line == "---":
                    newBlock = True
                    f1List = []
                    kList = []
                    aList = []
                    gList = []
                else:
                    parts = line.split(";")
                    if parts[0] == "avg":
                        r[n] = {}
                        addAvg(r[n], f1Score, parts[5], f1List)
                        addAvg(r[n], kScore, parts[6], kList)
                        addAvg(r[n], aScore, parts[7], aList)
                        addAvg(r[n], gScore, parts[8], gList)
                    elif parts[0] in indizes:
                        f1List.append(float(parts[5]))
                        kList.append(float(parts[6]))
                        aList.append(float(parts[7]))
                        gList.append(float(parts[8]))
    except FileNotFoundError as e:
        print(f"Missing file: {fileName}")
    return r

In [None]:
statistic = { }

for gan in gans:
    if ganName(gan) not in statistic:
        statistic[ganName(gan)] = {}
    
    for ds in testSets:
        if ds != "Average":
            statistic[ganName(gan)][cleanupName(ds)] = loadDiagnoseData(gan, ds)
            
            d = cleanupName(ds)
            if d not in dataset:
                dataset.append(d)

gans = list(statistic.keys())

In [None]:
scoreNames = [f1Score, f1ScoreSd, kScore, kScoreSd, aScore, aScoreSd, gScore, gScoreSd]

for gan in gans:
    sums = { name: { n: 0.0 for n in algs } for name in scoreNames }
    c = 0

    for ds in dataset:
        if ds != "Average":
            c += 1
            for n in algs:
                if n in statistic[gan][ds].keys():
                    for scoreName in scoreNames:
                        sums[scoreName][n] += statistic[gan][ds][n][scoreName]

    avg = {}
    for n in algs:
        avg[n] = { scoreName: sums[scoreName][n] / c for scoreName in scoreNames }
    statistic[gan]["Average"] = avg

In [None]:
def checkValues():
    for c in [statistic["Repeater"][x]['LR'] for x in statistic["Repeater"].keys()]:
        print([ c[k] for k in ["f1 score", "cohens kappa score", "average precision score", "G-mean score"]])

checkValues()

# Show Statistics

In [None]:
def drawTransparentRect(img, rect, color, opacity=1.0):
    def mix(v, a, b):
        return max(0, min(255, int((v * a) + ((1.0 - v) * b))))

    def mixPixel(v, a, b):
        return (mix(v, a[0], b[0]), mix(v, a[1], b[1]), mix(v, a[2], b[2]))


    ((x0, y0), (x1, y1)) = rect
    
    for y in range(y0, y1):
        for x in range(x0, x1):
            p = (x, y)
            c = mixPixel(opacity, color, img.getpixel(p))
            img.putpixel(p, c)
    

In [None]:
def drawDiagram(size, rowNames, data, colNames=[], colors=None, border=20, barIndent=10, fontSize=20, markers=[0.25, 0.5, 0.75, 1.00], maxY=1.0):
    silver = (204, 204, 204)
    black = (0,0,0)
    white = (255, 255, 255)
    
    defaultColors = [ (31,119,180)
                    , (255,127,14)
                    , (44,160,44)
                    , (214,40,40)
                    , (148,103,189)
                    , (140,86,75)
                    , (227,119,194)
                    , (127,127,127)
                    , (40,40,214)
                    ]

    defaultColors = [ (209,188,115) # Gold
                    , (145,196,223) # bright blue
                    , (83,119,202)  # blue
                    , (224,138,86)  # orange
                    , (131,202,112) # green
                    , (199,102,99)  # red
                    , (143,110,176) # violet
                    , (134,99,66)   # brown
                    , (207,131,189) # pink
                    ]
    
    def y(v):
        return v / maxY

    def med(v):
        if type(v) is tuple:
            return v[0]
        return v

    
    if colors is None:
        colors = defaultColors

    print((len(data[0]), len(colNames), len(data), len(rowNames)))

    font = ImageFont.truetype("FreeSans", fontSize)
    
    markerSize = 0
    for m in markers:
        markerSize = max(markerSize, font.getsize(f"{m:0.2f}")[0])

    areaTop = 2 * border + markerSize

    barStep = (size[0] - border - areaTop) // len(data)
    barSize = max(border, barStep - border)
    barIndent = min(barIndent, barSize / (1 + len(data[0])))
    barIndent = barSize / (2 + len(data[0]))
    
    print((size[0], barSize, barSize * len(data)))
    
    # Create new Image
    w = max(size[0], size[1])
    img = Image.new("RGB", (w,w))
    d = ImageDraw.Draw(img)
    
    # Set background to white.
    d.rectangle(((0,0), (w,w)), fill=white)
    
    # draw row names
    height = size[1]
    left = w - height
    textSize = 0
    for (n, name) in enumerate(rowNames):
        s = font.getsize(name)
        offset = int(border + barSize - s[1] + 1.5) // 2
        textSize = max(textSize, s[0])
        pos = (left + border, areaTop + offset + (barStep * n))
        d.text(pos, name, fill=black, font=font)
        
        
    # Calculate sizes for bar drawing.
    barLength = height - (4 * border) - textSize
    areaSize = (barLength, barSize)
    areaLeft = left + (2 * border) + textSize
    
    # Draw Lines for bar height comparing.
    markerPos = [areaLeft + int(y(v) * barLength) for v in markers]
    for p in markerPos:
        d.line(((p, border), (p, size[0] - border)), fill=silver)
        
    # Draw bars.
    for (n, row) in enumerate(data):
        area = ((areaLeft, areaTop + (n * barStep) + (border // 2)), areaSize)

        indices = list(range(len(row)))
        indices.sort(key= lambda i: 1.0 - med(row[i]))

        for (n, i) in enumerate(indices):
            v = y(med(row[i]))
            c = colors[i]
            offset = barIndent * n
            tl = (area[0][0], area[0][1] + offset)
            br = (tl[0] + int(v * area[1][0]), area[1][1] + tl[1] - offset)
            rect = (tl, br)
            d.rectangle(rect, fill=c, outline=black)

    for (n, row) in enumerate(data):
        area = ((areaLeft, areaTop + (n * barStep) + (border // 2)), areaSize)

        indices = list(range(len(row)))
        indices.sort(key= lambda i: 1.0 - med(row[i]))

        for (n, i) in enumerate(indices):
            v = y(med(row[i]))
            c = colors[i]
            offset = barIndent * n
            tl = (area[0][0], area[0][1] + offset)
            br = (tl[0] + int(v * area[1][0]), area[1][1] + tl[1] - offset)

            if type(row[i]) is tuple and (row[i][0] != 0.0 or row[i][1] != row[i][0] or row[i][2] != row[i][0]):
                vLower = y(row[i][1])
                vUpper = y(row[i][2])
                xCenter = (tl[1] + br[1]) // 2
                yUpper = tl[0] + int(vUpper * area[1][0])
                yLower = tl[0] + int(vLower * area[1][0])

                #d.line(((br[0], tl[1]), (br[0], br[1])), fill=black, width=5)
                d.line(((yLower, xCenter), (yUpper, xCenter)), fill=white, width=5)
                d.line(((yLower, xCenter - border), (yLower, xCenter + border)), fill=white, width=5)
                d.line(((yUpper, xCenter - border), (yUpper, xCenter + border)), fill=white, width=5)

                #d.line(((br[0], tl[1]), (br[0], br[1])), fill=c, width=3)
                d.line(((yLower, xCenter), (yUpper, xCenter)), fill=c, width=3)
                d.line(((yLower, xCenter - border), (yLower, xCenter + border)), fill=c, width=3)
                d.line(((yUpper, xCenter - border), (yUpper, xCenter + border)), fill=c, width=3)


    # Draw axis.
    d.line(((areaLeft, areaTop), (areaLeft, size[0] - border)), fill=black)
    d.line(((areaLeft, areaTop), (w - border, areaTop)), fill=black)
    
    # Draw y-axis text.
    img = img.rotate(90)
    d = ImageDraw.Draw(img)

    for (m, p) in zip(markers, markerPos):
        d.text((border, size[1] - (p - left)), f"{m:0.2f}", fill=black, font=font)
    
    # Draw legend.
    if len(colNames) > 0:
        colNameWidth = 0
        colNameHeight = fontSize * len(colNames)
        for c in colNames:
            colNameWidth = max(colNameWidth, font.getsize(c)[0])
        
        rWidth = int(fontSize * 0.75)
        rHeight = fontSize // 2
        rPadd = (fontSize - rHeight) // 2
        
        tl = (size[0] - (int(2.5 * border) + colNameWidth + rWidth), 0)
        br = (size[0], int(1.2 * border) + colNameHeight)
        drawTransparentRect(img, (tl, br), white, 0.75)

        for (n, c) in enumerate(colNames):
            t = border + (fontSize * n)
            l = size[0] - border - colNameWidth
            d.rectangle(((l - border - rWidth, t + rPadd - 1), (l - border, t + rPadd + rHeight)), fill=colors[n], outline=black)
            d.text((l, t), c, fill=black, font=font)
    
    return img.crop((0, 0, size[0], size[1]))

In [None]:
def showDiagnoseByAlgo(algo, score):
    def valueOf(d, g):
        d = cleanupName(d)
        if d not in statistic[g].keys():
            print(f"Missing '{d}' in '{g}'")
            return 0.0
        
        if algo in statistic[g][d].keys():
            return statistic[g][d][algo][score]
        else:
            print(f"Missing '{algo}' in ('{g}', '{d}')")
            return 0.0
    
    print(f"{algo}: {score}")
    
    data = [[valueOf(d, g) for g in gans] for d in testSets]        
    img = drawDiagram((1024, 1024), [cleanupName(d) for d in testSets], data, colNames=gans)
    img.save(f"data_result/statistics/byAlgorithm/statistic-{algo}-{score}.pdf")

def showDiagnoseByAlgoAverage(score):
    def valueOf(d, g):
        d = cleanupName(d)
        if d not in statistic[g].keys():
            print(f"Missing '{d}' in '{g}'")
            return 0.0

        s = 0.0
        c = 0
        for algo in algs:
            if algo in statistic[g][d].keys():
                s += statistic[g][d][algo][score]
                c += 1

        if c > 0:
            return s / c
        else:
            return 0.0
            
    print(f"showDiagnoseByAlgoAverage({score})")
    
    data = [[valueOf(d, g) for g in gans] for d in testSets]        
    img = drawDiagram((1024, 1024), [cleanupName(d) for d in testSets], data, colNames=gans)
    img.save(f"data_result/statistics/byAlgorithm/statistic-Average-{score}.pdf")
    plt.imshow(img)
    plt.show()

In [None]:
def showDiagnoseAverage(score, onlyOneBar=False, maxY=0.75):
    def valueOf(g, algo):
        if algo in statistic[g]["Average"].keys():
            if True: #not g.startswith("ConvGeN"):
                return statistic[g]["Average"][algo][score]
            else:
                a = statistic[g]["Average"][algo][score]
                sd = statistic[g]["Average"][algo][score + " - SD"]
                return (a, a - sd, a + sd)
        else:
            return 0.0

    print(f"Average: {score}")
    
    data = [[valueOf(g, algo) for g in gans] for algo in algs]
    img = drawDiagram((1024, 1024), algs, data, colNames=gans, maxY=maxY)
    img.save(f"data_result/statistics/average/statistic-Algo-Average-{score}.pdf")

In [None]:
def showDiagnoseDataset(dataset):
    print(f"{dataset}")
    
    def valueOf(algo, score, g):
        if dataset in statistic[g]:
            if algo in statistic[g][dataset]:
                if score in statistic[g][dataset][algo]:
                    return statistic[g][dataset][algo][score]
                    
        return 0.0
    
    scores = [f1Score, kScore]
    
    for score in scores:
        data = [[valueOf(algo, score, g) for algo in algs] for g in gans]
        img = drawDiagram((1024, 1024), gans, data, colNames=algs, maxY=0.75)
        img.save(f"data_result/statistics/byDataset/statistic-Classifier-{dataset}-{score}.pdf")


In [None]:
def showDiagnoseDatasetAverage():
    print("Average")
    
    dataset = "Average"
    
    def valueOf(algo, score, g):
        if dataset in statistic[g]:
            if algo in statistic[g][dataset]:
                if score in statistic[g][dataset][algo]:
                    if True: # algo != "DoC":
                        return statistic[g][dataset][algo][score]
                    else:
                        a = statistic[g][dataset][algo][score]
                        sd = statistic[g]["Average"][algo][score + " - SD"]
                        return (a, a - sd, a + sd)
                    
        return 0.0
    
    scores = [f1Score, kScore]
    
    for score in scores:
        data = [[valueOf(algo, score, g) for algo in algs] for g in gans]
        img = drawDiagram((1024, 1024), gans, data, colNames=algs, maxY=0.75)
        img.save(f"data_result/statistics/byDataset/statistic-Classifier-{dataset}-{score}.pdf")


In [None]:
gans = [g for g in statistic.keys() if filterConvGen(g)]

for (score, maxY) in [(f1Score, 0.75), (kScore, 0.75), (aScore, 0.75), (gScore, 1.0)]:
    for a in algs:
        showDiagnoseByAlgo(a, score)
    
    showDiagnoseByAlgoAverage(score)
    showDiagnoseAverage(score, maxY=maxY)

for t in testSets:
    showDiagnoseDataset(cleanupName(t))

showDiagnoseDatasetAverage()

gans = list(statistic.keys())

In [None]:
checkValues()

In [None]:
def getValueOf(gan, dataset, algo, score):
    if dataset not in statistic[gan].keys():
        #print(f"Missing '{dataset}' in '{gan}'")
        return None

    if algo not in statistic[gan][dataset].keys():
        #print(f"Missing '{algo}' in ('{gan}', '{dataset}')")
        return None
    
    if score not in statistic[gan][dataset][algo].keys():
        #print(f"Missing '{score}' in ('{gan}', '{dataset}', '{algo}')")
        return None
    
    return statistic[gan][dataset][algo][score]
    
    
    
def calcTable(algo, score, ignore=[]):
    table = []
    
    def calc(gc, g):
        n = 0
        for d in testSets:
            d = cleanupName(d)
            if d not in ignore:
                vc = getValueOf(gc, d, algo, score)
                v = getValueOf(g, d, algo, score)
                if vc is not None and v is not None and vc >= v:
                    n += 1
        return n
    
    for gc in gans:
        table.append([calc(gc, g) for g in gans])
    return table

In [None]:
tables = {}
ignore = [# "webpage"
         #, "mammography"
         #, "protein_homo"
         #, "ozone_level"
         #, "creditcard"
         ]
for a in algs:
    tables[a + " - " + f1Score] = calcTable(a, f1Score, ignore)
    tables[a + " - " + kScore] = calcTable(a, kScore, ignore)
    tables[a + " - " + aScore] = calcTable(a, aScore, ignore)
    tables[a + " - " + gScore] = calcTable(a, gScore, ignore)
    
tables[algs[0] + " - " + f1Score]

In [None]:
checkValues()

In [None]:
cmap = matplotlib.colors.ListedColormap([
    (1.0, x / 255.0, 0.0)
    for x in range(256)
    ] + [
    ((255 - x) / 255.0, (255 - x) / 255.0, 1.0) # x / 255.0)
    for x in range(256)
    ])

#cmap.set_extremes(bad=cmap(0.0), under=cmap(0.0), over=cmap(1.0))

for k in tables.keys():
    print(k)
    labels = list(gans)
    t = tables[k]
    if k[0:3] == "DoC":
        #continue
        labels = labels[-4:]
        t = [r[-4:] for r in t[-4:]]
        f = plt.figure(figsize=(5, 4))
        f.add_axes([0.4, 0.45, 0.6, 0.5])
    else:
        f = plt.figure(figsize=(7, 6))
        f.add_axes([0.27, 0.25, 0.7, 0.74])
    p = plt.imshow(t, cmap=cmap)
    plt.colorbar(p)
    plt.xticks(range(len(labels)), labels, rotation="vertical")
    plt.yticks(range(len(labels)), labels)
    plt.savefig(f"data_result/statistics/successCount/statistic-{k}.pdf")
    plt.show()

In [None]:
checkValues()

In [None]:
class Table:
    def __init__(self, heading):
        self.heading = [str(h) for h in heading]
        self.sizes = [len(h) for h in self.heading]
        self.rows = []
        
    def add(self, row):
        row = [str(r) for r in row]
        self.rows.append(row)
        self.sizes = [max(a,len(b)) for (a, b) in zip(self.sizes, row)]
        
    def separator(self):
        return "|".join(["-" * n for n in self.sizes])
    
    def showRow(self, row):
        def pad(n, t):
            while len(t) < n:
                t += " "
            return t
        
        return "|".join([pad(n, t) for (n,t) in zip(self.sizes, row)])
    
    def show(self):
        print(self.showRow(self.heading))
        print(self.separator())
        for row in self.rows:
            print(self.showRow(row))
            
    def showLatex(self, caption, key):
        
        columnConfig = "|".join(["l"] + ["@{\\hskip3pt}c@{\\hskip3pt}" for h in self.heading[1:]])

        text = "\\begin{table*}[ht]\\scriptsize"
        text += "\\caption{" + caption + "}\\label{" + key + "}"
        text += "\\centering\\tabularnewline\n"

        text += "\\begin{tabular}{" + columnConfig + "}\\hline\n"
        text += " & ".join(["\\textbf{" + h + "}" for h in self.heading])
        text += "\n\\tabularnewline\n\\hline\n"
        
        for row in self.rows:
            text += " & ".join(row)
            text += "\n\\tabularnewline\n"
            
        text += "\hline\end{tabular}\end{table*}\n"
        
        return text

In [None]:
scoreNames = [f1Score, kScore, aScore, gScore]

def tableRow(algo, dataset, myGans):
    return [ [ getValueOf(gan, dataset, algo, score) for score in scoreNames ] for gan in myGans ]

In [None]:
def p(f, bold=False):
    if f is None:
        text = "?"
    else:
        text = f"{f:0.3f}"
        
    if bold:
        return " \\textbf{" + text + "} "
    else:
        return " " + text + " "

def latex(text):
    r = ""
    for x in text:
        if x == "_" or x == "-":
            r += " "
        else:
            r += x
    return r

def pairMax(row):
    best = [0.0 for _ in row[0]]
    
    for v in row:
        for i, x in enumerate(v):
            best[i] = max(best[i], x)
    
    return best

def createTable(useConvGeN, fileName, scoreTitle, scoreSelection):    
    with open(fileName, "w") as latexFile:
        for algo in algs:
            latexFile.write("\n")
            latexFile.write("% ### " + algo + "\n")
            latexFile.write("\n")
            f = ""
            heading = [f"dataset ({scoreTitle})"]

            if useConvGeN:
                myGans = [x for x in gans if x.startswith("ConvGeN")]
            else:
                myGans = [x for x in gans if not x.startswith("ConvGeN") or x == 'ConvGeN(min,maj)']
                if algo[0:3] == "DoC":
                    continue
                    # myGans = list(gans)[-4:]

            for g in myGans:
                heading.append(latex(g))
            table = Table(heading)

            avg = [[0.0, 0.0] for h in heading[1:]]
            mx  = [[0.0, 0.0] for h in heading[1:]]
            cnt = 0

            for d in testSets:
                d = cleanupName(d)
                if d not in ignore:
                    cnt += 1
                    row = tableRow(algo, d, myGans)
                    line = [latex(d)]
                    row = [[r[i] for i in scoreSelection] for r in row]

                    #print((algo, d, myGans))
                    print(f"{cnt}: {row[0]}")

                    m = pairMax(row)

                    for (n, r) in enumerate(row):
                        line.append(f"{p(r[0], r[0] == m[0])} / {p(r[1], r[1] == m[1])}")
                        avg[n][0] += r[0] or 0.0
                        avg[n][1] += r[1] or 0.0
                        mx[n][0] = max(mx[n][0], r[0] or 0.0)
                        mx[n][1] = max(mx[n][1], r[1] or 0.0)
                    table.add(line)

            m = pairMax(avg)
            table.add(["\\hline Average"] + [f"{p(a / cnt, a == m[0])} / {p(b / cnt, b == m[1])}" for (a,b) in avg])
            #table.add(["maximum"] + [f"{p(a)} / {p(b)}" for (a,b) in mx])

            latexFile.write(table.showLatex(algo, "tab:results:" + algo + ":A") + "\n")



In [None]:
createTable(False, "data_result/statistics/Tables-f1-kappa.tex", "$f_1~$score$~/~\\kappa~$score", [0,1])

In [None]:
createTable(False, "data_result/statistics/Tables-a-g.tex", "avg.~prec.~score$~/~$G-mean~score", [2,3])

In [None]:
createTable(True, "data_result/statistics/TablesConvGeN-f1-kappa.tex", "$f_1~$score$~/~\\kappa~$score", [0,1])
createTable(True, "data_result/statistics/TablesConvGeN-a-g.tex", "avg.~prec.~score$~/~$G-mean~score", [2,3])

In [None]:
scoreFileExtensions = ["f1", "kappa", "avg-prec", "gMean"]

for algo in algs:
    print("% ### " + algo)
    heading = ["dataset"]
    for g in gans:
        if filterConvGen(g):
            heading.append(g)
    table = []
    
    avg = [[0.0 for _ in scoreFileExtensions] for h in heading[1:]]
    cnt = 0
    
    for d in testSets:
        d = cleanupName(d)
        if d not in ignore:
            cnt += 1
            row = tableRow(algo, d, heading[1:])
            table.append([ [d for _ in scoreFileExtensions] ] + row)

            for (n, r) in enumerate(row):
                for k in range(len(scoreFileExtensions)):
                    avg[n][k] += r[k] or 0.0
    
    table.append([ ["Average" for _ in scoreFileExtensions] ] 
                 + [[v / cnt for v in a] for a in avg]
                )
    
    for ri, name in enumerate(scoreFileExtensions):
        with open(f"data_result/statistics/{algo}-{name}.csv", "w") as f:
            f.write((";".join(heading)) + "\n")
            for row in table:
                f.write((";".join([str(x[ri]) for x in row])) + "\n")


In [None]:

for gan in gans:
    scoreNames = [f1Score, kScore, aScore, gScore]
    sums = {score: 0.0 for score in scoreNames}
    counts = {score: 0 for score in scoreNames}

    if not gan.startswith("ConvGeN") or gan == 'ConvGeN(min,maj)':
        for t in testSets:
            for alg in ["LR", "RF", "GB", "KNN", "DoC"]:
                if alg in statistic[gan][cleanupName(t)].keys():
                    for score in scoreNames:
                        sums[score] += statistic[gan][cleanupName(t)][alg][score]
                        counts[score] += 1
                    

    print("----")
    print(f"{gan}:")
    for score in scoreNames:
        if counts[score] > 0:
            print(f"{score}: {sums[score] / counts[score]}")
        else:
            print(f"{score}: -")

In [None]:
scoreNames = [f1Score, kScore, aScore, gScore]

def csvForScore(f, score, withDoC):
    print(score)
    myGans = [g for g in gans if filterConvGen(g)]
    heading = ["dataset"]
    heading.extend(myGans)
    table = Table([latex(r) for r in heading])
    
    f.write((";".join(heading)) + "\n")
    sums = [0.0 for _ in myGans]
    counts = [0 for _ in myGans]
    for t in testSets:
        row = [cleanupName(t)]

        for n, gan in enumerate(myGans):
            ganTsStat = statistic[gan][cleanupName(t)]
            s = 0.0
            c = 0

            for alg in ["LR", "RF", "GB", "KNN", "DoC"]:
                docOk = withDoC or alg != "DoC"
                algOk = alg in ganTsStat.keys()
                if docOk and algOk and score in ganTsStat[alg].keys():
                    s += ganTsStat[alg][score]
                    c += 1


            counts[n] += 1
            s = s / c
            sums[n] += s
            row.append(s)
          
        f.write(row[0] + ";" + (";".join([f"{s:0.3f}" for s in row[1:]])) + "\n")
        
        m = max(row[1:])
        row[0] = latex(row[0])
        for n, s in enumerate(row):
            if n == 0:
                continue
                
            if s == m:
                row[n] = "\\textbf{" + f"{s:0.3f}" + "}"
            else:
                row[n] = f"{s:0.3f}"
        table.add(row)
        
    row = ["average"]
    row.extend([s/ c for s, c in zip(sums, counts)])
    f.write(row[0] + ";" + (";".join([f"{s:0.3f}" for s in row[1:]])) + "\n")
    print(row)
    
    m = max(row[1:])
    row[0] = latex(row[0])
    for n, s in enumerate(row):
        if n == 0:
            continue

        if s == m:
            row[n] = "\\textbf{" + f"{s:0.3f}" + "}"
        else:
            row[n] = f"{s:0.3f}"
    table.add(row)
    if withDoC:
        ext = ":A"
    else:
        ext = ":B"
    return table.showLatex(score, "tab:results:" + score + ext)

texW = ""
texWO = ""
for score in scoreNames:
    with open(f"data_result/statistics/average-with_DoC-{score}.csv", "w") as f:
        texW += "% -------------------------------\n"
        texW += "% " + score + "\n"
        texW += "% -------------------------------\n"
        texW += csvForScore(f, score, True)
        texW += "\n\n"

    with open(f"data_result/statistics/average-without_DoC-{score}.csv", "w") as f:
        texWO += "% -------------------------------\n"
        texWO += "% " + score + "\n"
        texWO += "% -------------------------------\n"
        texWO += csvForScore(f, score, False)
        texWO += "\n\n"

with open(f"data_result/statistics/average-by_score-with_DoC.tex", "w") as f:
    f.write(texW)

with open(f"data_result/statistics/average-by_score-without_DoC.tex", "w") as f:
    f.write(texWO)
