# Create Figures for Paper

Ce notebook permet de créer les figures du papier.

In [1]:
# ---------------------------- PREPARING NOTEBOOK ---------------------------- #
# Autoreload
%load_ext autoreload
%autoreload 2

# Random seed
import numpy as np
np.random.seed(42)

# External modules
import os
from IPython.display import display, Markdown, Latex, clear_output
from tqdm import notebook as tqdm

# Set global log level
import logging
logging.basicConfig(level=logging.INFO)
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# Define PWD as the current git repository
import git
repo = git.Repo('.', search_parent_directories=True)
pwd = repo.working_dir
os.chdir(pwd)

## Loading Data

### Zero-shot

In [2]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd, "results", "topic_classification", "ZeroShot", "flaubert_metrics.csv"
)
flaubert_metrics = pd.read_csv(path)
flaubert_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,Food,0.095238,0.261905,0.666667
1,Business,0.107143,0.285714,0.607143
2,Science,0.276596,0.716312,0.829787
3,Music,0.387755,0.92517,0.945578
4,Travel,0.477157,0.903553,0.974619
5,World,0.491935,0.814516,0.919355
6,Technology,0.544304,0.683544,0.797468
7,Health,0.606195,0.845133,0.986726
8,Culture,0.630662,0.864111,0.944251
9,Politics,0.672131,0.803279,0.868852


In [3]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "ZeroShot",
    "Flaubert-pretrained_classification_metrics.csv",
)
flaubert_classification_metrics = pd.read_csv(path)
flaubert_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,Flaubert-pretrained,0.61917,0.61917,0.61917,0.61917


In [4]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "ZeroShot",
    "mDeBERTa-v3-base-mnli-xnli_metrics.csv",
)
mDeBERTa_metrics = pd.read_csv(path)
mDeBERTa_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,Science,0.099291,0.276596,0.468085
1,World,0.153226,0.725806,0.862903
2,Business,0.25,0.571429,0.928571
3,Culture,0.275261,0.745645,0.919861
4,Technology,0.367089,0.746835,0.898734
5,Health,0.424779,0.778761,0.880531
6,Sport,0.439068,0.795699,0.915771
7,Travel,0.492386,0.837563,0.954315
8,Food,0.547619,0.857143,0.97619
9,Music,0.70068,0.952381,0.979592


In [5]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "ZeroShot",
    "mDeBERTa-v3-base-mnli-xnli_classification_metrics.csv",
)
mDeBERTa_classification_metrics = pd.read_csv(path)
mDeBERTa_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,mDeBERTa-v3-base-mnli-xnli,0.419272,0.419272,0.419272,0.419272


### OpenAi Topic Classifier

In [6]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "OpenAiTopicClassification",
    "ZeroShot_davinci-002_metrics.csv",
)
davinci_metrics = pd.read_csv(path)
davinci_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,World,0.0,0.056452,0.072581
1,Business,0.0,0.0,0.035714
2,Food,0.0,0.0,0.02381
3,Culture,0.003484,0.017422,0.832753
4,Science,0.007092,0.744681,0.751773
5,Health,0.039823,0.088496,0.778761
6,Technology,0.050633,0.050633,0.063291
7,Music,0.054422,0.884354,0.891156
8,Politics,0.131148,0.131148,0.131148
9,Sport,0.374552,0.413978,0.415771


In [7]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "OpenAiTopicClassification",
    "ZeroShot_davinci-002_classification_metrics.csv",
)
davinci_classification_metrics = pd.read_csv(path)
davinci_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,ZeroShot_davinci-002,0.228088,0.228088,0.228088,0.228088


In [8]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "OpenAiTopicClassification",
    "ZeroShot_gpt-3.5-turbo-1106_metrics.csv",
)
gpt_metrics = pd.read_csv(path)
gpt_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,Business,0.142857,0.821429,1.0
1,Science,0.248227,0.468085,0.574468
2,World,0.274194,0.983871,1.0
3,Technology,0.56962,0.848101,0.936709
4,Food,0.571429,0.833333,0.880952
5,Culture,0.616725,0.926829,0.965157
6,Travel,0.639594,0.93401,0.979695
7,Health,0.654867,0.90708,0.960177
8,Music,0.673469,0.952381,0.972789
9,Sport,0.822581,0.9319,0.958781


In [9]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "OpenAiTopicClassification",
    "ZeroShot_gpt-3.5-turbo-1106_classification_metrics.csv",
)
gpt_classification_metrics = pd.read_csv(path)
gpt_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,ZeroShot_gpt-3.5-turbo-1106,0.64736,0.64736,0.64736,0.64736


### Flaubert fine-tuned

In [10]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "FlaubertFineTuned",
    "flaubert_fine_tuned_metrics.csv",
)
flaubert_fine_tuned_metrics = pd.read_csv(path)
flaubert_fine_tuned_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,Science,0.453901,0.964539,1.0
1,Business,0.535714,0.785714,0.785714
2,Food,0.714286,0.880952,0.880952
3,Politics,0.745902,0.909836,0.967213
4,Culture,0.749129,0.965157,0.996516
5,Travel,0.796954,0.974619,0.994924
6,Music,0.802721,0.945578,0.993197
7,Health,0.809735,0.986726,0.995575
8,World,0.814516,0.991935,1.0
9,Technology,0.835443,0.962025,0.974684


In [11]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "FlaubertFineTuned",
    "flaubert_fine_tuned_classification_metrics.csv",
)
flaubert_fine_tuned_classification_metrics = pd.read_csv(path)
flaubert_fine_tuned_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,FlaubertFineTuned,0.791389,0.791389,0.791389,0.791389


## Figures Creation

In [14]:
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

matplotlib.use("pgf")
matplotlib.rcParams.update(
    {
        "pgf.texsystem": "pdflatex",
        "font.family": "serif",
        "text.usetex": True,
        "pgf.rcfonts": False,
        "font.size": 10,
    }
)


metrics = {
    "Flaubert-pretrained": flaubert_metrics.copy(),
    "Davinci-002": davinci_metrics.copy(),
    "GPT-3.5-turbo-1106": gpt_metrics.copy(),
    "mDeBERTa": mDeBERTa_metrics.copy(),
    "Flaubert-fine-tuned": flaubert_fine_tuned_metrics.copy(),
}

# Format all tables the same way
for key in metrics.keys():
    metrics[key].columns = [
        "topic",
        "accuracy@1",
        "accuracy@3",
        "accuracy@5",
    ]

    display(metrics[key])

for key in metrics.keys():
    df = metrics[key].copy()
    # Assurez-vous que le DataFrame 'metrics' est déjà créé et trié par 'accuracy@1'
    # metrics.sort_values("accuracy@1", ascending=False)

    # Réinitialiser l'index pour pouvoir utiliser la colonne des labels dans le graphique

    # Nous utilisons seaborn pour un style de graphique amélioré
    sns.set_theme(style="whitegrid")

    # La taille du graphique peut être ajustée pour mieux s'adapter à vos données
    plt.figure(figsize=(10, 8))

    # Création d'un graphique à barres empilées
    # bottom paramètre spécifie où la barre commence, donc nous empilons en additionnant la précision précédente
    acc_1 = plt.barh(
        df.topic,
        df["accuracy@1"],
        label="Top 1 Accuracy",
        height=0.6,
        color="#1f77b4",
    )
    acc_2 = plt.barh(
        df.topic,
        df["accuracy@3"] - df["accuracy@1"],
        label="Top 3 Accuracy",
        height=0.5,
        left=df["accuracy@1"],
        color="#ff7f0e",
    )
    acc_3 = plt.barh(
        df.topic,
        df["accuracy@5"] - df["accuracy@3"],
        label="Top 5 Accuracy",
        height=0.4,
        left=df["accuracy@3"],
        color="#2ca02c",
    )

    # Titre du graphique et étiquettes des axes
    plt.title(
        f"Accuracy for model {key} : {round(df['accuracy@1'].iloc[-1] * 100, 2)}%"
    )
    plt.xlabel("Accuracy")
    plt.ylabel("Label")

    # Rotation des étiquettes sur l'axe des x pour une meilleure lisibilité si nécessaire
    plt.xticks(rotation=45, ha="right")

    # Ajout d'une légende
    plt.legend()

    # Utilisez plt.tight_layout() si vous constatez que les étiquettes sont coupées dans la sortie finale
    plt.tight_layout()

    # Remove grid
    plt.grid(False)

    # Give specific color to first bar (more dark)
    plt.gca().get_children()[11].set(hatch="//", alpha=0.8, edgecolor="white", height=1)
    plt.gca().get_children()[23].set(
        hatch="//", alpha=0.8, edgecolor="white", height=0.9
    )
    plt.gca().get_children()[35].set(
        hatch="//", alpha=0.8, edgecolor="white", height=0.8
    )

    # Set inches
    plt.gcf().set_size_inches(8, h=8 / 1.618)

    # Save figure
    path = os.path.join(
        pwd,
        "figures",
        "topic_classification",
        f"accuracy_{key}.pgf",
    )
    if not os.path.exists(os.path.dirname(path)):
        os.makedirs(os.path.dirname(path))
    plt.savefig(path, bbox_inches="tight")

Unnamed: 0,topic,accuracy@1,accuracy@3,accuracy@5
0,Food,0.095238,0.261905,0.666667
1,Business,0.107143,0.285714,0.607143
2,Science,0.276596,0.716312,0.829787
3,Music,0.387755,0.92517,0.945578
4,Travel,0.477157,0.903553,0.974619
5,World,0.491935,0.814516,0.919355
6,Technology,0.544304,0.683544,0.797468
7,Health,0.606195,0.845133,0.986726
8,Culture,0.630662,0.864111,0.944251
9,Politics,0.672131,0.803279,0.868852


Unnamed: 0,topic,accuracy@1,accuracy@3,accuracy@5
0,World,0.0,0.056452,0.072581
1,Business,0.0,0.0,0.035714
2,Food,0.0,0.0,0.02381
3,Culture,0.003484,0.017422,0.832753
4,Science,0.007092,0.744681,0.751773
5,Health,0.039823,0.088496,0.778761
6,Technology,0.050633,0.050633,0.063291
7,Music,0.054422,0.884354,0.891156
8,Politics,0.131148,0.131148,0.131148
9,Sport,0.374552,0.413978,0.415771


Unnamed: 0,topic,accuracy@1,accuracy@3,accuracy@5
0,Business,0.142857,0.821429,1.0
1,Science,0.248227,0.468085,0.574468
2,World,0.274194,0.983871,1.0
3,Technology,0.56962,0.848101,0.936709
4,Food,0.571429,0.833333,0.880952
5,Culture,0.616725,0.926829,0.965157
6,Travel,0.639594,0.93401,0.979695
7,Health,0.654867,0.90708,0.960177
8,Music,0.673469,0.952381,0.972789
9,Sport,0.822581,0.9319,0.958781


Unnamed: 0,topic,accuracy@1,accuracy@3,accuracy@5
0,Science,0.099291,0.276596,0.468085
1,World,0.153226,0.725806,0.862903
2,Business,0.25,0.571429,0.928571
3,Culture,0.275261,0.745645,0.919861
4,Technology,0.367089,0.746835,0.898734
5,Health,0.424779,0.778761,0.880531
6,Sport,0.439068,0.795699,0.915771
7,Travel,0.492386,0.837563,0.954315
8,Food,0.547619,0.857143,0.97619
9,Music,0.70068,0.952381,0.979592


Unnamed: 0,topic,accuracy@1,accuracy@3,accuracy@5
0,Science,0.453901,0.964539,1.0
1,Business,0.535714,0.785714,0.785714
2,Food,0.714286,0.880952,0.880952
3,Politics,0.745902,0.909836,0.967213
4,Culture,0.749129,0.965157,0.996516
5,Travel,0.796954,0.974619,0.994924
6,Music,0.802721,0.945578,0.993197
7,Health,0.809735,0.986726,0.995575
8,World,0.814516,0.991935,1.0
9,Technology,0.835443,0.962025,0.974684


In [26]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)

metrics = {
    "Flaubert-pretrained": flaubert_classification_metrics.copy(),
    "Davinci-002": davinci_classification_metrics.copy(),
    "GPT-3.5-turbo-1106": gpt_classification_metrics.copy(),
    "mDeBERTa": mDeBERTa_classification_metrics.copy(),
    "Flaubert-fine-tuned": flaubert_fine_tuned_classification_metrics.copy(),
}

# Format all tables the same way
for key in metrics.keys():
    metrics[key].columns = [
        "model",
        "accuracy",
        "f1 (micro)",
        "precision (micro)",
        "recall (micro)",
    ]

# Concatenate all metrics
df = pd.concat(metrics.values(), axis=0)
df.columns = [
    "model",
    "accuracy",
    "f1 (micro)",
    "precision (micro)",
    "recall (micro)",
]
df["model"] = metrics.keys()
df.set_index("model", inplace=True)

# Sort by accuracy
df.sort_values("accuracy", ascending=False, inplace=True)


# Style dataframe
# Bold best results
def highlight_best(x):
    # Create empty dataframe
    df = pd.DataFrame("", index=x.index, columns=x.columns)

    # Bold max of accuracy	f1 (micro)	precision (micro)	recall (micro)
    for metric in ["accuracy", "f1 (micro)", "precision (micro)", "recall (micro)"]:
        df[metric].loc[x[metric].idxmax()] += "font-weight: bold; color: #FF9999;"

    return df


styled_df = (
    df.style.background_gradient(cmap=sns.light_palette("green", as_cmap=True))
    .apply(highlight_best, axis=None)
    .applymap_index(lambda v: "font-weight: bold;", axis="columns")
    .applymap_index(lambda v: "font-weight: bold;", axis="rows")
    .format(decimal=",", thousands=".", precision=2)
)
display(styled_df)
path = os.path.join(
    pwd, "figures", "topic_classification", "classification_metrics.tex"
)
if not os.path.exists(os.path.dirname(path)):
    os.makedirs(os.path.dirname(path))
latex = styled_df.to_latex(
    caption=(f"Metrics for topic classification models."),
    clines="skip-last;data",
    convert_css=True,
    position_float="centering",
    multicol_align="|c|",
    hrules=True,
)

# Add \begin{adjustbox}{center}
latex = latex.replace(
    "\\begin{tabular}", "\\begin{adjustbox}{center}\n\\begin{tabular}"
).replace("\\end{tabular}", "\\end{tabular}\n\\end{adjustbox}")

# Add comment under the table
latex = latex.replace(
    "\\end{adjustbox}",
    "\\end{adjustbox}\n\\begin{minipage}{12cm}\n\\vspace{0.1cm}\n\\hline\n\\vspace{0.1cm}\n\\begin{itemize}\n\\item The best results have been highlighted in bold and light red.\n\\item The color gradient is more intense for the best results.\n\\item The models are sorted by accuracy\n\\end{itemize}\n\\end{minipage}",
)

# Force position of table
latex = latex.replace("\\begin{table}", "\\begin{table}[!h]")

print(latex)

Unnamed: 0_level_0,accuracy,f1 (micro),precision (micro),recall (micro)
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Flaubert-fine-tuned,79,79,79,79
GPT-3.5-turbo-1106,65,65,65,65
Flaubert-pretrained,62,62,62,62
mDeBERTa,42,42,42,42
Davinci-002,23,23,23,23


\begin{table}[!h]
\centering
\caption{Metrics for topic classification models.}
\begin{adjustbox}{center}
\begin{tabular}{lrrrr}
\toprule
 & \bfseries accuracy & \bfseries f1 (micro) & \bfseries precision (micro) & \bfseries recall (micro) \\
model &  &  &  &  \\
\midrule
\bfseries Flaubert-fine-tuned & {\cellcolor[HTML]{008000}} \color[HTML]{F1F1F1} \bfseries \color[HTML]{FF9999} 0,79 & {\cellcolor[HTML]{008000}} \color[HTML]{F1F1F1} \bfseries \color[HTML]{FF9999} 0,79 & {\cellcolor[HTML]{008000}} \color[HTML]{F1F1F1} \bfseries \color[HTML]{FF9999} 0,79 & {\cellcolor[HTML]{008000}} \color[HTML]{F1F1F1} \bfseries \color[HTML]{FF9999} 0,79 \\
\bfseries GPT-3.5-turbo-1106 & {\cellcolor[HTML]{3C9D3C}} \color[HTML]{F1F1F1} 0,65 & {\cellcolor[HTML]{3C9D3C}} \color[HTML]{F1F1F1} 0,65 & {\cellcolor[HTML]{3C9D3C}} \color[HTML]{F1F1F1} 0,65 & {\cellcolor[HTML]{3C9D3C}} \color[HTML]{F1F1F1} 0,65 \\
\bfseries Flaubert-pretrained & {\cellcolor[HTML]{48A348}} \color[HTML]{F1F1F1} 0,62 & {\cellcolor