In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import json

In [None]:
# Set matplotlib to use LaTeX for font rendering
plt.rcParams.update({
    "text.usetex": True,  # Enable LaTeX text rendering
    "font.family": "serif",  # Use a serif font (typically Computer Modern in LaTeX)
    "axes.labelsize": 12,    # Adjust axis label font size
    "axes.titlesize": 14,    # Adjust title font size
    "font.size": 12          # General font size
})

In [None]:
with open("../data/data.json") as f:
    data = json.load(f)

In [None]:
data_dict = {
    'Chat-GPT 4o': {
        'categories': [],
        'years': [],
        'references': [],
        'citations': [],
        'jifs': [],
        'journal_categories': []
    },
    'Claude 3.5 Sonnet': {
        'categories': [],
        'years': [],
        'references': [],
        'citations': [],
        'jifs': [],
        'journal_categories': []
    },
    'Gemini 1.5 Flash': {
        'categories': [],
        'years': [],
        'references': [],
        'citations': [],
        'jifs': [],
        'journal_categories': []
    }
}

In [None]:
for model in data['models']:
    for category in data['models'][model]:
        for paper in data['models'][model][category]['papers']:
            data_dict[model]['categories'].append(category)
            data_dict[model]['references'].append(paper['reference'])
            data_dict[model]['years'].append(paper['year'])
            data_dict[model]['citations'].append(paper['citations'])
            data_dict[model]['jifs'].append(paper['jif'])
            data_dict[model]['journal_categories'].append(paper['journal_categories'])

In [None]:
plot_data = {
    'models': [model for model in data_dict for _ in range(len(data_dict[model]['references']))],
    'categories': [category for model in data_dict for category in data_dict[model]['categories']],
    'references': [ref for model in data_dict for ref in data_dict[model]['references']],
    'years': [year for model in data_dict for year in data_dict[model]['years']],
    'citations': [citation for model in data_dict for citation in data_dict[model]['citations']],
    'jifs': [jif for model in data_dict for jif in data_dict[model]['jifs']],
    'journal_categories': [category for model in data_dict for category in data_dict[model]['journal_categories']]
}

plot_data['models'] = [model.split(' ')[0] for model in plot_data['models']]

In [None]:
df = pd.DataFrame(plot_data)

In [None]:
custom_palette = {
    'Chat-GPT': '#e9c46a',
    'Claude': '#f4a261',
    'Gemini': '#e76f51',
}

plt.figure(figsize=(3, 6))
sns.boxplot(x='models', y='years', data=df, palette=custom_palette)
sns.stripplot(x='models', y='years', data=df, color='#2a9d8f', size=5, jitter=0.05, alpha=0.75)

# Customize the plot
# plt.title(r'\textbf{Distribution of citations by LLM}')
plt.xlabel(r'\textbf{LLM}')
plt.ylabel(r'\textbf{Year of publication}')

# Save the plot as a PNG file
plt.savefig('years_box_plot.png', format='png', dpi=300, bbox_inches='tight')

# Optionally, show the plot
plt.show()

In [None]:
plt.figure(figsize=(3, 6))
sns.boxplot(x='models', y='citations', data=df, palette=custom_palette)
sns.stripplot(x='models', y='citations', data=df, color='#2a9d8f', size=5, jitter=0.05, alpha=0.75)

# Customize the plot
# plt.title(r'\textbf{Distribution of citations by LLM}')
plt.xlabel(r'\textbf{LLM}')
plt.ylabel(r'\textbf{Number of citations}')

# Save the plot as a PNG file
plt.savefig('citations_box_plot.png', format='png', dpi=300, bbox_inches='tight')

# Optionally, show the plot
plt.show()

In [None]:
plt.figure(figsize=(3, 6))
sns.boxplot(x='models', y='jifs', data=df, palette=custom_palette)
sns.stripplot(x='models', y='jifs', data=df, color='#2a9d8f', size=5, jitter=0.05, alpha=0.75)

# Customize the plot
# plt.title(r'\textbf{Distribution of citations by LLM}')
plt.xlabel(r'\textbf{LLM}')
plt.ylabel(r'\textbf{JIF}')

# Save the plot as a PNG file
plt.savefig('jifs_box_plot.png', format='png', dpi=300, bbox_inches='tight')

# Optionally, show the plot
plt.show()