In [22]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import json

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
def results_df(i, models = ["SVM", "KNN", "RF", "LR"], param_n = [8, 16, 24, 32, 64, 96, 128]):
    results_filename = '../results/{}_out.txt'.format(i)
    df_dict = {model: pd.DataFrame() for model in models}

    with open(results_filename, 'r') as f:   
        df = pd.DataFrame(columns=["train", "test"])

        for line in f.readlines():
            
            if line.strip().lstrip('=') in models:
                model = line.strip().lstrip('=')
                master_df = df_dict[model]

            elif line.startswith("{"):
                params = line.replace("'", "\"")
                params = json.loads(params)
                df = df.append(params, ignore_index=True)

            elif line.startswith("n="):
                train, test = line.split("-->")[1].split("/")
                train, test = "%.2f" % float(train), "%.2f" % float(test)
                train, test = train.strip('0').rstrip('.'), test.strip('0').rstrip('.')         
                df.loc[len(df)-1,["train", "test"]] = [train, test]

                if len(df) == len(param_n):
                    df["k"] = param_n
                    master_df = master_df.append(df, ignore_index=True)
                    df_dict[model] = master_df
                    df = pd.DataFrame(columns=["train", "test"])

    master_df = df_dict["SVM"]
    master_df = master_df.drop(['class_weight'], axis=1)
    master_df['gamma'] = master_df['gamma'].apply(lambda x: "%.3f" % x)
    master_df = master_df.rename(columns={'gamma': '$\gamma$'})
    df_dict["SVM"] = master_df          

    master_df = df_dict["KNN"]
    master_df['metric'] = master_df['metric'].apply(lambda x: x[0].upper())
    master_df['weights'] = master_df['weights'].apply(lambda x: x[0].upper())
    master_df = master_df.rename(columns={'n_neighbors': 'n'})
    master_df['n'] = master_df['n'].astype('int')
    df_dict["KNN"] = master_df

    master_df = df_dict["RF"]
    master_df = master_df.rename(columns={'ccp_alpha': '$\\alpha$', 'class_weight': 'weights', 'n_estimators': 'n'})
    master_df['weights'] = master_df['weights'].apply(lambda x: "".join([i[0].upper() for i in x.split("_")]))
    master_df['criterion'] = master_df['criterion'].apply(lambda x: x[0].upper())
    master_df['n'] = master_df['n'].astype('int')
    df_dict["RF"] = master_df

    master_df = df_dict["LR"]
    master_df = master_df.drop(['class_weight'], axis=1)
    df_dict["LR"] = master_df
    
    return df_dict

In [31]:
def results_latex_table(i, model_1, model_2, df_dict, param_n = [8, 16, 24, 32, 64, 96, 128]):
    master_df_1 = df_dict[model_1].copy()
    master_df_2 = df_dict[model_2].copy()
    master_df_1.drop(["k"], inplace=True, axis=1)
 
    master_df = master_df_1.join(master_df_2, lsuffix='_1', rsuffix='_2')
    master_df["№"] = np.hstack([[i]+[""]*(len(param_n)-1) for i in range(1, len(master_df) // len(param_n) + 1)])
    cols = master_df.columns.tolist()
    master_df = master_df[cols[-2:][::-1] + cols[:-2]]

    c1 = len(master_df_1.columns)
    c2 = len(master_df_2.columns)-1

    table = master_df.to_latex(index=False,
                               column_format='|c|c|{}||{}|'.format('c'*c1,'c'*c2),
                               escape=False,
                               header = [c.rstrip("_12") for c in master_df.columns],
                               caption = "{} and {} parameters for experiments {}.".format(model_1, model_2, i),
                               label = "tab:params_{}_{}_{}".format(i, model_1.lower(), model_2.lower())
                              )

    table = table.replace("\\toprule\n", "& & \multicolumn{%d}{c||}{\\underline{%s}} & \multicolumn{%d}{c|}{\\underline{%s}}\\\\\n" % (c1, model_1, c2, model_2))
    table = table.replace("\\midrule\n", "")
    table = table.replace("\\bottomrule\n", "")

    table = table.split("\n")
    table[4] += "\\hline"
    for i in range(len(table)):
        if (i-6) % 7 == 0:
            table[i] += "\\hline"
    table = "\n".join(table)
    return table

In [33]:
for i in range(1,5):
# for i in range(1,2):
    df_dict = results_df(i)
    with open("../tex/{}_1.tex".format(i), "w") as text_file:
        text_file.write(
            results_latex_table(i, "SVM", "KNN", df_dict)
        )
        
    with open("../tex/{}_2.tex".format(i), "w") as text_file:
        text_file.write(
            results_latex_table(i, "RF", "LR", df_dict)
        )       

In [43]:
stat_df = pd.DataFrame(columns=["SVM", "KNN", "RF", "LR"])
for i in range(1,5):
    df_dict = results_df(1)
    temp_stat_df = pd.DataFrame()
    for model in df_dict.keys():
        temp_stat_df[model] = df_dict[model]["test"]
    stat_df = stat_df.append(temp_stat_df, ignore_index=True)

stat_df.to_csv("test.csv")