In [1]:
import os
import numpy as np
import pandas as pd
# from ydata_profiling import ProfileReport
import seaborn as sns
from math import pi
import matplotlib.pyplot as plt

In [2]:
# df_result = pd.read_csv('', index_col = False)
# print(df_result.head())

In [3]:
# df_result = df_result.loc[:, ~df_result.columns.str.contains('^Unnamed')]
# df = df.drop(columns=[col for col in df.columns if 'Unnamed' in col])

In [4]:
# print(df_result)

In [5]:
# df_result.to_csv(f'automatic_eval_res.csv', index=False)

In [6]:
# profile = ProfileReport(df_result, title="Profiling Report")

In [7]:
# profile.to_notebook_iframe()

In [8]:
df_result = pd.read_csv('automatic_eval_res.csv')
print(df_result.head())

  Language Methodology   ACC    CS  BLEU    PPL   AVG
0       en    Parallel  79.5  81.5  46.5  102.3  69.2
1       en          AE   7.5  78.0  42.0  102.3  42.5
2       en          BT  27.0  65.5  11.5  118.0  34.7
3       en      MSF-AE  64.5  72.5  36.0  200.2  57.7
4       en      MSF-BT  67.0  56.5   8.0   65.7  43.8


In [9]:
# 1. Bar Chart: Average Scores Across Languages
plt.figure(figsize=(10, 6))
sns.barplot(x='Language', y='AVG', data=df_result, errorbar=None)
plt.title('Average Scores Across Languages')
# plt.show()

save_path = os.path.join('figs/', f'avg.png')
plt.savefig(save_path)
plt.close()

In [10]:
# 2. Heatmap: Sentiment Accuracy, Similarity, BLEU Score, and PPL
metrics = ['ACC', 'CS', 'BLEU', 'PPL']

for metric in metrics:
    pivot_table = df_result.pivot_table(index='Language', columns='Methodology', values=metric)
    plt.figure(figsize=(12, 8))
    sns.heatmap(pivot_table, annot=True, cmap='coolwarm', center=pivot_table.stack().mean())
    plt.title(f'Heatmap of {metric} by Language and Methodology')
    # plt.show()
    save_path = os.path.join('figs/', f'heatmap_{metric}.png')
    plt.savefig(save_path)
    plt.close()

In [11]:
# 3. Line Chart: Performance of Different Models
models = df_result['Methodology'].unique()
for metric in metrics:
    plt.figure(figsize=(14, 8))
    for model in models:
        model_data = df_result[df_result['Methodology'] == model]
        sns.lineplot(x='Language', y=metric, data=model_data, label=model)
    plt.title(f'Performance of Models Across Languages for {metric}')
    plt.legend()
    # plt.show()
    save_path = os.path.join('figs/', f'linechart_{metric}.png')
    plt.savefig(save_path)
    plt.close()

In [12]:
# 4. Grouped Bar Chart: English and Hindi vs. Low-Resource Languages
resource_rich = ['en', 'hi']
low_resource = df_result[~df_result['Language'].isin(resource_rich)]['Language'].unique()

for metric in metrics:
    plt.figure(figsize=(14, 8))
    sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Language'].isin(resource_rich + low_resource.tolist())], ci=None)
    plt.title(f'{metric} for Resource-Rich vs. Low-Resource Languages')
    # plt.show()
    save_path = os.path.join('figs/', f'groupedbar_{metric}.png')
    plt.savefig(save_path)
    plt.close()


The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Language'].isin(resource_rich + low_resource.tolist())], ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Language'].isin(resource_rich + low_resource.tolist())], ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Language'].isin(resource_rich + low_resource.tolist())], ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Language'].isin(resource_rich + low_resource.tolist())], ci=None)


In [13]:
# 5. Clustered Bar Chart: Masking vs. Non-Masking Methodologies
masking_methods = ['MSF-AE', 'MSF-BT']
non_masking_methods = ['AE', 'BT']

for metric in metrics:
    plt.figure(figsize=(14, 8))
    sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Methodology'].isin(masking_methods + non_masking_methods)], ci=None)
    plt.title(f'Impact of Masking Techniques on {metric}')
    # plt.show()
    save_path = os.path.join('figs/', f'clusteredbar_{metric}.png')
    plt.savefig(save_path)
    plt.close()


The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Methodology'].isin(masking_methods + non_masking_methods)], ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Methodology'].isin(masking_methods + non_masking_methods)], ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Methodology'].isin(masking_methods + non_masking_methods)], ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Language', y=metric, hue='Methodology', data=df_result[df_result['Methodology'].isin(masking_methods + non_masking_methods)], ci=None)


In [14]:
# 6. Stacked Bar Chart: Performance of Models for Each Language
languages = df_result['Language'].unique()
for language in languages:
    language_data = df_result[df_result['Language'] == language]
    language_data.set_index('Methodology')[metrics].T.plot(kind='bar', stacked=True, figsize=(14, 8))
    plt.title(f'Performance of Models for {language}')
    # plt.show()
    save_path = os.path.join('figs/', f'stackedbar_{language}.png')
    plt.savefig(save_path)
    plt.close()

In [15]:
# 7. Box Plot: Distribution of Specific Metrics Across Languages
for metric in metrics:
    plt.figure(figsize=(14, 8))
    sns.boxplot(x='Language', y=metric, data=df_result)
    plt.title(f'Distribution of {metric} Across Languages')
    # plt.show()
    save_path = os.path.join('figs/', f'boxplot_{metric}.png')
    plt.savefig(save_path)
    plt.close()

In [16]:
# 8. Line Chart: Indo-Aryan vs. Dravidian Languages
indo_aryan = ['en', 'hi', 'mag', 'mr', 'or', 'pa', 'ur']
dravidian = ['ml', 'te']

metrics = ['ACC', 'CS', 'BLEU', 'PPL']

for metric in metrics:
    plt.figure(figsize=(14, 8))
    
    indo_aryan_data = df_result[df_result['Language'].isin(indo_aryan)]
    sns.lineplot(x='Language', y=metric, hue='Methodology', data=indo_aryan_data, marker='o')
    
    dravidian_data = df_result[df_result['Language'].isin(dravidian)]
    sns.lineplot(x='Language', y=metric, hue='Methodology', data=dravidian_data, marker='s', linestyle='--')
    
    plt.title(f'Performance of Indo-Aryan vs. Dravidian Languages for {metric}')
    plt.legend(title='Methodology', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xlabel('Language')
    plt.ylabel(metric)
    plt.tight_layout()
    # plt.show()

    save_path = os.path.join('figs/', f'linechart_indovsdrav_{metric}.png')
    plt.savefig(save_path)
    plt.close()

In [17]:
languages = df_result['Language'].unique()
methodologies = df_result['Methodology'].unique()
categories = ['ACC', 'CS', 'BLEU', 'PPL']
num_vars = len(categories)

for language in languages:
    plt.figure(figsize=(8, 8))
    angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
    angles += angles[:1]

    ax = plt.subplot(111, polar=True)

    for methodology in methodologies:
        subset = df_result[(df_result['Language'] == language) & (df_result['Methodology'] == methodology)]
        if subset.empty:
            print(f"No data for language: {language}, methodology: {methodology}")
            continue
        values = subset[categories].values.flatten().tolist()
        if len(values) == 0:
            print(f"No metric values for language: {language}, methodology: {methodology}")
            continue
        values += values[:1]
        ax.plot(angles, values, label=methodology)
        ax.fill(angles, values, alpha=0.25)

    ax.set_yticklabels([])
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(categories)
    plt.title(f'Radar Chart of Methodologies for {language}')
    plt.legend()

    save_path = os.path.join('figs/', f'radarchart_{language}.png')
    plt.savefig(save_path)
    plt.close()

No data for language: en, methodology: En-IP-TR-Train
No data for language: en, methodology: En-Op-Tr


In [18]:
# 10. Scatter Plot: Sentiment Accuracy vs. BLEU Score
plt.figure(figsize=(14, 8))
sns.scatterplot(x='ACC', y='BLEU', hue='Language', style='Methodology', data=df_result)
plt.title('Sentiment Accuracy vs. BLEU Score')
# plt.show()
save_path = os.path.join('figs/', f'scatter_accvsbleu.png')
plt.savefig(save_path)
plt.close()

In [19]:
# 11. Scatter Plot: Sentiment Accuracy vs. Similarity
plt.figure(figsize=(14, 8))
sns.scatterplot(x='ACC', y='CS', hue='Language', style='Methodology', data=df_result)
plt.title('Sentiment Accuracy vs. CS Score')
# plt.show()
save_path = os.path.join('figs/', f'scatter_accvscs.png')
plt.savefig(save_path)
plt.close()

In [20]:
# 12. Scatter Plot: Bleu score vs. Similarity
plt.figure(figsize=(14, 8))
sns.scatterplot(x='BLEU', y='CS', hue='Language', style='Methodology', data=df_result)
plt.title('BLEU vs. CS Score')
# plt.show()
save_path = os.path.join('figs/', f'scatter_bleuvscs.png')
plt.savefig(save_path)
plt.close()

In [None]:
# 13. analysis between the categories of methodologies
methodology_categories = {
    'Parallel': ['Parallel'],
    'Non-parallel': ['AE', 'BT', 'MSF-AE', 'MSF-BT'],
    'Cross-Lingual': ['En-IP-TR-Train','En-OP-TR'],
    'Shared Learning Joint': ['Joint'],
    'Large Language Models': ['Llama2', 'Llama2_chat', 'GPT3.5']
}

category_data = []

for category, methodologies in methodology_categories.items():
    category_data.extend([(category, methodology, metric, value)
                          for methodology in methodologies
                          for metric, values in category_metrics[category].items()
                          for value in values])

df_category = pd.DataFrame(category_data, columns=['Category', 'Methodology', 'Metric', 'Value'])

for metric in metrics:
    plt.figure(figsize=(10, 6))
    sns.boxplot(data=df_category[df_category['Metric'] == metric], x='Category', y='Value', hue='Methodology')
    plt.title(f'Comparison of {metric} across Methodology Categories')
    plt.xlabel('Methodology Category')
    plt.ylabel(metric)
    plt.xticks(rotation=45)
    plt.legend(title='Methodology', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    # plt.show()

    save_path = os.path.join('figs/', f'category_{metric}.png')
    plt.savefig(save_path)
    plt.close()