In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Models and prompt types
models = ['mistral-7b', 'llama-8b', 'Gemma2-9b', 'llama3-70b', 'gpt-4']
prompt_types = ['zero-shot', 'random sentence', 'random paragraph', 'ontology sentence', 'ontology paragraph']

# Data for BLEU scores 
bleu_scores = [
    [42.33, 41.88, 45.20, 46.24, 52.24],  # mistral-7b
    [52.91, 50.56, 48.19, 53.11, 55.65],  # llama-8b
    [64.41, 63.90, 63.32, 77.69, 66.76],  # Gemma2-9b
    [58.51, 56.48, 57.93, 67.83, 61.99],  # llama3-70b
    [65.26, 64.23, 61.00, 81.02, 66.65]   # gpt-4
]

# Set up the bar chart
x = np.arange(len(models))  # Number of models
width = 0.15  # Width of each bar

fig, ax = plt.subplots()
for i, prompt in enumerate(prompt_types):
    ax.bar(x + i * width, [bleu_scores[j][i] for j in range(len(models))], width, label=prompt)

# Labels and formatting
ax.set_xlabel('Models')
ax.set_ylabel('BLEU Score')
ax.set_title('BLEU Score Across Models and Prompt Types')
ax.set_xticks(x + width * 2)
ax.set_xticklabels(models)
ax.legend()

plt.show()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Example data for all metrics
data = {
    'Model': ['Mistral-7b', 'Mistral-7b', 'Mistral-7b', 'Mistral-7b', 'Mistral-7b',
              'Llama3-8b', 'Llama3-8b', 'Llama3-8b', 'Llama3-8b', 'Llama3-8b',
              'Gemma2-9b', 'Gemma2-9b', 'Gemma2-9b', 'Gemma2-9b', 'Gemma2-9b',
              'Llama3-70b', 'Llama3-70b', 'Llama3-70b', 'Llama3-70b', 'Llama3-70b',
              'GPT-4o', 'GPT-4o', 'GPT-4o', 'GPT-4o', 'GPT-4o'],
    'Prompt': ['Zero-Shot', 'RS', 'RP', 'OS', 'OP']*5,
    'BLEU↑': [42.33, 41.88, 45.20, 46.24, 52.24, 52.91, 50.56, 48.19, 53.11, 55.65,
             64.41, 63.90, 63.32, 77.69, 66.76, 58.51, 56.48, 55.33, 67.83, 61.99, 
             65.26, 65.15, 63.90 , 81.02, 66.65],
    'ROUGE↑': [0.34, 0.36, 0.38, 0.43, 0.40, 0.37, 0.34, 0.32, 0.38, 0.35,
              0.39, 0.40, 0.39, 0.47, 0.42, 0.44, 0.49, 0.49, 0.59, 0.47,
              0.44, 0.46, 0.45, 0.58, 0.46],
    'METEOR↑': [0.39, 0.43, 0.43, 0.45, 0.41, 0.39, 0.36, 0.34, 0.38, 0.35,
               0.36, 0.37, 0.36, 0.40, 0.38, 0.44, 0.50, 0.49, 0.54, 0.46,
               0.39, 0.42, 0.42, 0.47, 0.42],
    'BERTScore↑': [0.85, 0.85, 0.86, 0.87, 0.86, 0.86, 0.85, 0.85, 0.86, 0.86,
                  0.87, 0.87, 0.86, 0.88, 0.87, 0.88, 0.88, 0.88, 0.90, 0.88,
                  0.88, 0.87, 0.87, 0.90, 0.88]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Define custom colors for each model
colors = {
    'Mistral-7b': 'plum',
    'Llama3-8b': 'lightgreen',
    'Gemma2-9b': 'gold',
    'Llama3-70b': 'lightcoral',
    'GPT-4o': 'skyblue'
}

# Function to plot a line chart for each metric on the specified axes
def plot_metric(ax, metric_name):
    df_pivot = df.pivot(index="Prompt", columns="Model", values=metric_name)
    
    for model in df_pivot.columns:
        ax.plot(df_pivot.index, df_pivot[model], marker='o', label=model, color=colors[model])
    
    # Labels and formatting
    ax.set_title(f'{metric_name}', fontsize=16)  # Set title font size
    ax.tick_params(axis='x', labelsize=10)  # Set x-tick font size
    ax.tick_params(axis='y', labelsize=10)  # Set y-tick font size
    # ax.grid(True)

# Create a figure with 2x2 subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 5))

# Plot each metric in a different subplot
plot_metric(axes[0, 0], 'BLEU↑')
plot_metric(axes[0, 1], 'ROUGE↑')
plot_metric(axes[1, 0], 'METEOR↑')
plot_metric(axes[1, 1], 'BERTScore↑')

# Add legend with bigger font size
axes[1, 1].legend(title="Models", loc='upper left', bbox_to_anchor=(1, 1), fontsize=12, title_fontsize=14)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Define the data for OC values for each model and prompt type, now including 'no_demo'
prompt_types = ['RP', 'RS', 'OP', 'OS', 'Zero_Shot']
mistral_7b = [64.52, 77.26, 83.25, 82.96, 88.92]
llama3_8b = [79.42, 74.87, 93.27, 91.01, 93.61]
gemma2_9b = [73.84, 86.90, 71.22, 96.62, 94.87]
llama3_70b = [81.41, 92.73, 89.54, 97.44, 99.47]
gpt_4o = [82.85, 94.83, 91.92, 96.20, 94.09]

# Define colors for each model
colors = ['plum', 'lightgreen', 'gold', 'lightcoral', 'skyblue']

# Plotting the data
fig, ax = plt.subplots(figsize=(10, 6))

# Plot each model's OC scores across prompt types, now including 'no_demo'
ax.plot(prompt_types, mistral_7b, marker='o', label='Mistral-7b', color=colors[0], linestyle='-', linewidth=2)
ax.plot(prompt_types, llama3_8b, marker='o', label='Llama3-8b', color=colors[1], linestyle='-', linewidth=2)
ax.plot(prompt_types, gemma2_9b, marker='o', label='Gemma2-9b', color=colors[2], linestyle='-', linewidth=2)
ax.plot(prompt_types, llama3_70b, marker='o', label='Llama3-70b', color=colors[3], linestyle='-', linewidth=2)
ax.plot(prompt_types, gpt_4o, marker='o', label='GPT-4o', color=colors[4], linestyle='-', linewidth=2)

# Add titles and labels
ax.set_title('Average Ontology Conformance (OC) Across Models and Prompt Types', fontsize=16)
ax.set_xlabel('Prompt Types', fontsize=12)
ax.set_ylabel('Average OC (%)', fontsize=12)

# Add legend
ax.legend(title="Models", loc='best', fontsize=12, title_fontsize=14)

# Enable grid
ax.grid(True)

# Adjust x-axis limits for padding
ax.set_xlim([-0.2, 4.2])

# Display the plot
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Data for Hallucination (Subject, Relation, and Object)
data = {
    'Model': ['Mistral-7b', 'Mistral-7b', 'Mistral-7b', 'Mistral-7b', 'Mistral-7b',
              'Llama3-8b', 'Llama3-8b', 'Llama3-8b', 'Llama3-8b', 'Llama3-8b',
              'Gemma2-9b', 'Gemma2-9b', 'Gemma2-9b', 'Gemma2-9b', 'Gemma2-9b',
              'Llama3-70b', 'Llama3-70b', 'Llama3-70b', 'Llama3-70b', 'Llama3-70b',
              'GPT-4o', 'GPT-4o', 'GPT-4o', 'GPT-4o', 'GPT-4o'],
    'Prompt': ['Zero-Shot', 'RS', 'RP', 'OS', 'OP']*5,
    'Subject': [33.30, 21.04, 23.42, 25.99, 15.10, 
                17.83, 26.22, 44.95, 34.09, 26.62, 
                8.55, 5.04, 12.84, 3.33, 8.62, 
                27.47, 10.02, 7.43, 3.96, 5.38, 
                11.54, 7.02, 8.69, 2.12, 4.13],
    'Relation': [11.08, 22.74, 35.48, 17.04, 16.75,
                 6.39, 25.13, 20.58, 8.99, 6.73,
                 5.13, 13.10, 26.16, 3.38, 28.78,
                 0.53, 7.27, 18.59, 2.56, 10.46,
                 5.91, 5.17, 17.15, 3.80, 8.08],
    'Object': [44.13, 31.75, 33.08, 37.64, 27.02,
               40.68, 38.27, 44.19, 42.27, 35.34,
               14.47, 14.29, 15.03, 10.00, 10.05,
               32.47, 17.03, 13.19, 9.10, 14.10,
               16.43, 9.85, 10.13, 7.82, 8.25]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Define custom colors for each model
colors = {
    'Mistral-7b': 'plum',
    'Llama3-8b': 'lightgreen',
    'Gemma2-9b': 'gold',
    'Llama3-70b': 'lightcoral',
    'GPT-4o': 'skyblue'
}

# Function to plot a line chart for each Hallucination metric (Subject, Relation, Object)
def plot_hallucination_metric(ax, metric_name):
    df_pivot = df.pivot(index="Prompt", columns="Model", values=metric_name)
    
    # Plot each model with its corresponding color
    for model in df_pivot.columns:
        ax.plot(df_pivot.index, df_pivot[model], marker='o', label=model, color=colors[model])
    
    # Labels and formatting
    ax.set_title(f'Hallucination - {metric_name}↓', fontsize=16)  # font size
    ax.tick_params(axis='x', labelsize=10)
    ax.tick_params(axis='y', labelsize=10)
    # ax.grid(True)

# Create a figure with 3 subplots for Subject, Relation, and Object
fig, axes = plt.subplots(1, 3, figsize=(12, 3))  # Reduce figsize to make plots smaller

# Plot Hallucination metrics
plot_hallucination_metric(axes[0], 'Subject')
plot_hallucination_metric(axes[1], 'Relation')
plot_hallucination_metric(axes[2], 'Object')

# Add legend with smaller font size
axes[2].legend(title="Models", loc='upper left', bbox_to_anchor=(1, 1), fontsize=12, title_fontsize=14)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Data for Format Conformance
data = {
    'Model': ['Mistral-7b', 'Mistral-7b', 'Mistral-7b', 'Mistral-7b', 'Mistral-7b',
              'Llama3-8b', 'Llama3-8b', 'Llama3-8b', 'Llama3-8b', 'Llama3-8b',
              'Gemma2-9b', 'Gemma2-9b', 'Gemma2-9b', 'Gemma2-9b', 'Gemma2-9b',
              'Llama3-70b', 'Llama3-70b', 'Llama3-70b', 'Llama3-70b', 'Llama3-70b',
              'GPT-4o', 'GPT-4o', 'GPT-4o', 'GPT-4o', 'GPT-4o'],
    'Prompt': ['0-Shot', 'RS', 'RP', 'OS', 'OP']*5,
    'Format Conformance': [93.77, 96.48, 91.92, 95.65, 91.58, 
                           83.45, 84.66, 86.20, 78.41, 90.96, 
                           71.50, 80.77, 90.43, 92.36, 86.47, 
                           96.62, 97.71, 98.02, 97.03, 96.38, 
                           95.90, 97.69, 96.27, 96.79, 99.18]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Define unique colors for each model
colors = {
    'Mistral-7b': 'plum',
    'Llama3-8b': 'lightgreen',
    'Gemma2-9b': 'gold',
    'Llama3-70b': 'lightcoral',
    'GPT-4o': 'skyblue' 
}

# Plotting the data
fig, ax = plt.subplots(figsize=(10, 6))

# Get unique models
models = df['Model'].unique()

# Plot each model's Format Conformance scores across prompt types
for model in models:
    model_data = df[df['Model'] == model]
    ax.plot(model_data['Prompt'], model_data['Format Conformance'], marker='o', label=model, color=colors[model], linewidth=2)

# Add titles and labels
ax.set_title('Format Conformance Rate Across Models and Prompt Types', fontsize=16)
ax.set_xlabel('Prompt Types', fontsize=12)
ax.set_ylabel('Format Conformance (%)', fontsize=12)

# Add legend
ax.legend(title="Models", loc='best', fontsize=12, title_fontsize=14)

# Display the plot
plt.tight_layout()
plt.grid(True)
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Data
ontologies = ['Empathi', 'Assistance', 'Accident', 'Activity', 'Land', 'Location', 'Victim']
num_prompts = [15, 10, 15, 11, 20, 14, 15]
entity_types = [20, 14, 34, 29, 31, 5, 27]
relation_types = [22, 7, 6, 24, 10, 5, 12]

# Colors
colors = ['plum', 'lightgreen', 'gold']

# Set position for bars on X axis
bar_width = 0.25
r1 = np.arange(len(ontologies))
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]

# Create figure
plt.figure(figsize=(10, 6))

# Plot the bars with specified colors
plt.bar(r1, num_prompts, color=colors[0], width=bar_width, edgecolor='grey', label='Number of Prompts')
plt.bar(r2, entity_types, color=colors[1], width=bar_width, edgecolor='grey', label='Entity Types')
plt.bar(r3, relation_types, color=colors[2], width=bar_width, edgecolor='grey', label='Relation Types')

# Add labels and title
plt.xlabel('Ontology', fontweight='bold')
plt.xticks([r + bar_width for r in range(len(ontologies))], ontologies)
plt.ylabel('Count', fontweight='bold')
plt.title('Overview of Prompts, Entity Types, and Relation Types for Each Ontology', fontsize=40)

# Add legend
plt.legend(fontsize=24, title_fontsize=26)

# Show the plot
plt.tight_layout()
plt.show()
