# Experiments Summaries Analysis

In [1]:
import glob
import numpy as np
import pandas as pd
from typing import Dict, List, Optional, Tuple, Union
from utils import df_to_latex, summarize_tables, sum_table_to_df

<div class="alert alert-info">
    <b>ACHTUNG</b>: `experiments_path` ändern für verschiedene Zusammenfassungen.


</div>

In [2]:
experiments_path = "../data/analysis/experiments_summaries/experiment_summary_13"
vectorization_methods = ["bow", "zscore", "tfidf", "cos", "zcos"]
drop_not_tuned = True

## Speeches corpus (only tuned classification methods)

In [3]:
speeches_path = experiments_path + "/speeches/all_classification_tables/"
speeches_clf_tables = glob.glob(speeches_path + "/*.csv")

In [4]:
speeches_dict = {}

for vectorization_method in vectorization_methods:
    speeches_dict[vectorization_method] = sum_table_to_df(summarize_tables(speeches_clf_tables, 
                                                                           speeches_path, 
                                                                           vectorization_method,
                                                                           drop_not_tuned = drop_not_tuned))

In [5]:
speeches_bow_df = speeches_dict["bow"]
speeches_zscore_df = speeches_dict["zscore"]
speeches_tfidf_df = speeches_dict["tfidf"]
speeches_cos_df = speeches_dict["cos"]

speeches_zcos_df = speeches_dict["zcos"]

### Speeches DataFrames to latex tables (remove comment for desired table)

In [6]:
#print(df_to_latex(speeches_bow_df))
#print(df_to_latex(speeches_zscore_df))
#print(df_to_latex(speeches_tfidf_df))
#print(df_to_latex(speeches_cos_df))
print(df_to_latex(speeches_zcos_df))

\small
\begin{tabular}{c|cccc}
\hline
& \textbf{2000} & \textbf{3000} & \textbf{4000} & \textbf{5000}\\\hline
\textbf{SD-tKNN} & 0.508 (0.528) & 0.504 (0.587) & 0.535 (0.581) & 0.527 (0.573)\\
\textbf{SD-tNSC} & 0.542 (0.526) & 0.485 (0.559) & 0.531 (0.555) & 0.569 (0.572)\\
\textbf{tLSVM} & 0.727 (0.721) & 0.727 (0.723) & 0.704 (0.7) & 0.727 (0.697)\\
\end{tabular}


## Reduced prose corpus (only tuned classification methods)

In [9]:
red_prose_path = experiments_path + "/red_prose/all_classification_tables/"
red_prose_clf_tables = glob.glob(red_prose_path + "/*.csv")

In [10]:
red_prose_dict = {}

for vectorization_method in vectorization_methods:
    red_prose_dict[vectorization_method] = sum_table_to_df(summarize_tables(red_prose_clf_tables, 
                                                                           red_prose_path, 
                                                                           vectorization_method,
                                                                           drop_not_tuned = drop_not_tuned))

In [11]:
red_prose_bow_df = red_prose_dict["bow"]
red_prose_zscore_df = red_prose_dict["zscore"]
red_prose_tfidf_df = red_prose_dict["tfidf"]
red_prose_cos_df = red_prose_dict["cos"]

red_prose_zcos_df = red_prose_dict["zcos"]

In [19]:
#print(df_to_latex(red_prose_bow_df))
#print(df_to_latex(red_prose_zscore_df))
#print(df_to_latex(red_prose_tfidf_df))
#print(df_to_latex(red_prose_cos_df))
print(df_to_latex(red_prose_zcos_df))

\small
\begin{tabular}{c|cccc}
\hline
& \textbf{2000} & \textbf{3000} & \textbf{4000} & \textbf{5000}\\\hline
\textbf{SD-tKNN} & 0.96 (0.963) & 0.972 (0.963) & 0.957 (0.965) & 0.967 (0.967)\\
\textbf{SD-tNSC} & 0.977 (0.972) & 0.99 (0.98) & 0.967 (0.987) & 0.98 (0.979)\\
\textbf{tLSVM} & 0.99 (0.98) & 0.995 (0.983) & 0.997 (0.992) & 0.99 (0.985)\\
\end{tabular}


## Prose corpus (only tuned classification methods)

In [6]:
prose_path = experiments_path + "/prose/all_classification_tables/"
prose_clf_tables = glob.glob(prose_path + "/*.csv")

In [7]:
prose_dict = {}

for vectorization_method in vectorization_methods:
    prose_dict[vectorization_method] = sum_table_to_df(summarize_tables(prose_clf_tables, 
                                                                        prose_path, 
                                                                        vectorization_method,
                                                                        drop_not_tuned = drop_not_tuned))

ValueError: Length mismatch: Expected axis has 1 elements, new values have 3 elements

In [10]:
prose_bow_df = prose_dict["bow"]
prose_zscore_df = prose_dict["zscore"]
prose_tfidf_df = prose_dict["tfidf"]
prose_cos_df = prose_dict["cos"]

In [11]:
prose_bow_df

Unnamed: 0,200,300,500,1000,2000,3000
tKNN,0.731 (0.615),0.74 (0.641),0.767 (0.674),0.809 (0.69),0.833 (0.72),0.829 (0.738)
tNSC,0.529 (0.499),0.581 (0.521),0.62 (0.56),0.672 (0.578),0.731 (0.621),0.739 (0.652)
tMNB,0.899 (0.845),0.935 (0.878),0.941 (0.896),0.949 (0.913),0.966 (0.915),0.97 (0.926)
tLSVM,0.95 (0.883),0.956 (0.888),0.963 (0.899),0.963 (0.899),0.969 (0.895),0.96 (0.902)
tLR,0.968 (0.945),0.986 (0.955),0.979 (0.953),0.96 (0.9),0.938 (0.834),0.907 (0.793)


### Prose DataFrames to latex tables (remove comment for desired table)

In [16]:
#print(df_to_latex(prose_bow_df))
#print(df_to_latex(prose_zscore_df))
#print(df_to_latex(prose_tfidf_df))
#print(df_to_latex(prose_cos_df))