Pythia410M Epochs Analysis

In [1]:
import os
import json
import numpy as np
from scipy.stats import hmean

# Define the datasets of interest
datasets = [
    'vqa-v2', 'textvqa-ocr', 'textvqa-pure', 'gqa', 'refcoco',
    'wsc273', 'winogrande', 'lambada_standard', 'arc_easy', 'arc_challenge'
]

# Define the result dictionary
result = {}

# Path to the JSON results file
results_file = 'results_nlp.json'

# Read the JSON results file
if os.path.isfile(results_file):
    with open(results_file, 'r') as f:
        result = json.load(f)
else:
    print(f"Error: File {results_file} not found.")

# Mapping from different methods to model names
model_mappings = {
    'vicuna' : 'reproduction-align-pythia+410m',
    'original' : 'stage-final-llava-v15-pythia+410m',
    'original+2epochs': 'stage-final-llava-v15-pythia+410m-epochs-2',
    'original+3epochs': 'stage-final-llava-v15-pythia+410m-epochs-3',
    'soft': 'stage-final-llava-v15-pythia+410m-soft',
    'soft+2epochs': 'stage-final-llava-v15-pythia+410m-soft-epochs-2',
    'soft+3epochs': 'stage-final-llava-v15-pythia+410m-soft-epochs-3',
    'oolf': 'stage-final-llava-v15-pythia+410m-oolf',
    'sgm-oolf': 'stage-final-llava-v15-pythia+410m-sgm-oolf'
}

# Label name mappings for the main methods and variants of LoRA
name_mapping = {
    'vicuna' : 'Language Only LLM',
    'original': 'Naive FT (410M)',
    'original+2epochs': 'Naive FT (410M, +2 Epochs)',
    'original+3epochs': 'Naive FT (410M, +3 Epochs)',
    'soft': 'Soft Targets (410M)',
    'soft+2epochs': 'Soft Targets (410M, +2 Epochs)',
    'soft+3epochs': 'Soft Targets (410M, +3 Epochs)',
    'oolf': 'Corrected OLF LLaVA (410M)',
    'sgm-oolf': 'SGM + Corrected OLF (410M)'
}

# Initialize model mappings
model_results = {model: [] for model in model_mappings.keys()}

# Populate the mappings based on the given mappings
for method, model_name in model_mappings.items():
    if model_name in result:
        metrics = result[model_name]
        accuracies = list(metrics.values())
        avg_accuracy = np.nanmean(accuracies)
        model_results[method].append((model_name, avg_accuracy))
    else:
        print(f"Warning: Model '{model_name}' not found in results")

# Identify the highest accuracy model for each mapping
highest_accuracy_models = {}
for method, mappings in model_results.items():
    if mappings:
        highest_accuracy_models[method] = max(mappings, key=lambda x: x[1])

# Save the highest accuracy models to a file
output_file = 'highest_accuracy_models.json'
with open(output_file, 'w') as f:
    json.dump(highest_accuracy_models, f, indent=2)

# Prepare data for radar chart
methods = [name_mapping[model] for model in model_mappings.keys() if model in highest_accuracy_models]
results_dict = {}

for model in model_mappings.keys():
    if model in highest_accuracy_models:
        model_name, _ = highest_accuracy_models[model]
        metrics = result[model_name]
        accuracies = {dataset: metrics.get(dataset, np.nan) for dataset in datasets}
        results_dict[name_mapping[model]] = accuracies

# Output the results dictionary
print(json.dumps(results_dict, indent=2))

import json
import numpy as np

# Calculate delta values
original_llava_acc = results_dict["Naive FT (410M)"]
language_only_llm_acc = results_dict.get("Language Only LLM", {})

table_data = []

for model, accuracies in results_dict.items():
    if model in ["Naive FT (410M)", "Language Only LLM"]:
        continue
    avg_acc_vl = hmean([accuracies[dataset] for dataset in ["vqa-v2", "textvqa-ocr", "textvqa-pure", "gqa"]])
    avg_acc_nl = hmean([accuracies[dataset] for dataset in ["wsc273", "winogrande", "arc_easy", "arc_challenge", "lambada_standard"]])
    delta_nl = hmean([language_only_llm_acc.get(dataset, np.nan) for dataset in ["wsc273", "winogrande", "arc_easy", "arc_challenge", "lambada_standard"]]) - avg_acc_nl 
    table_data.append((model, accuracies, avg_acc_vl, delta_nl, avg_acc_nl))

# Sort the data by Avg. VL Accuracy and highest NL Delta
table_data = sorted(table_data, key=lambda x: (x[2], -x[3]), reverse=True)

# Separate the data into two groups
lora_variants = [
    'LoRA (1/4 Full Rank)', 'LoRA (1/4 Full Rank, Higher Alpha)', 
    'LoRA (1/2 Full Rank)', 'LoRA (1/2 Full Rank, Higher Alpha)', 
    'LoRA (1/2 Full Rank, RSLoRA)', 'LoRA (1/2 Full Rank, RSLoRA, KQV Target)'
]

lora_table_data = [item for item in table_data if item[0] in lora_variants]
other_table_data = [item for item in table_data if item[0] not in lora_variants]

# Function to format values or return "-"
def format_value(value):
    return "{:.2f}".format(value * 100) if not np.isnan(value) else "-"

# Generate LaTeX table for LoRA variants
latex_code_lora = """
\\begin{table*}[h]
  \\caption{\\textbf{LLaVA Model Performance:} LoRA Variants}
  \\label{tab:lora_variants_acc}
  \\centering
  \\resizebox{\\linewidth}{!}{
    \\begin{tabular}{l|cccc|c|cc}
     \\toprule
     \\textbf{Model} & \\multicolumn{4}{c|}{\\textbf{Vision-Language (VL)}} & \\textbf{VL Avg.} & \\multicolumn{2}{c}{\\textbf{NL Avg.}} \\\\
     & \\textbf{VQAv2} & \\textbf{TextVQA OCR} & \\textbf{TextVQA Pure} & \\textbf{GQA} & Acc $\\uparrow$ & $\\Delta \\downarrow$ & Acc $\\uparrow$ \\\\
     \\midrule
"""

original_llava_avg_delta_nl = hmean([language_only_llm_acc.get(dataset, 0) for dataset in ["wsc273", "winogrande", "arc_easy", "arc_challenge", "lambada_standard"]]) - hmean([original_llava_acc[dataset] for dataset in ["wsc273", "winogrande", "arc_easy", "arc_challenge", "lambada_standard"]])

latex_code_lora += "Naive FT & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {avg_acc_vl} & {delta_nl} & {avg_acc_nl} \\\\\n".format(
    vqa_v2=format_value(original_llava_acc["vqa-v2"]),
    textvqa_ocr=format_value(original_llava_acc["textvqa-ocr"]),
    textvqa_pure=format_value(original_llava_acc["textvqa-pure"]),
    gqa=format_value(original_llava_acc["gqa"]),
    avg_acc_vl=format_value(hmean([original_llava_acc["vqa-v2"], original_llava_acc["textvqa-ocr"], original_llava_acc["textvqa-pure"], original_llava_acc["gqa"]])),
    delta_nl=format_value(original_llava_avg_delta_nl),
    avg_acc_nl=format_value(hmean([original_llava_acc["wsc273"], original_llava_acc["winogrande"], original_llava_acc["arc_easy"], original_llava_acc["arc_challenge"], original_llava_acc["lambada_standard"]]))
)

latex_code_lora += "Language Only LLM & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {avg_acc_vl} & - & {avg_acc_nl} \\\\\n".format(
    vqa_v2=format_value(language_only_llm_acc.get("vqa-v2", np.nan)),
    textvqa_ocr=format_value(language_only_llm_acc.get("textvqa-ocr", np.nan)),
    textvqa_pure=format_value(language_only_llm_acc.get("textvqa-pure", np.nan)),
    gqa=format_value(language_only_llm_acc.get("gqa", np.nan)),
    avg_acc_vl=format_value(hmean([language_only_llm_acc.get("vqa-v2", 0), language_only_llm_acc.get("textvqa-ocr", 0), language_only_llm_acc.get("textvqa-pure", 0), language_only_llm_acc.get("gqa", 0)])),
    avg_acc_nl=format_value(hmean([language_only_llm_acc.get("wsc273", 0), language_only_llm_acc.get("winogrande", 0), language_only_llm_acc.get("arc_easy", 0), language_only_llm_acc.get("arc_challenge", 0), language_only_llm_acc.get("lambada_standard", 0)]))
)

latex_code_lora += "\\midrule\n"

for model, accuracies, avg_acc_vl, delta_nl, avg_acc_nl in lora_table_data:
    latex_code_lora += "{model} & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {acc_vl} & {delta_nl} & {acc_nl} \\\\\n".format(
        model=model,
        vqa_v2=format_value(accuracies["vqa-v2"]),
        textvqa_ocr=format_value(accuracies["textvqa-ocr"]),
        textvqa_pure=format_value(accuracies["textvqa-pure"]),
        gqa=format_value(accuracies["gqa"]),
        acc_vl=format_value(avg_acc_vl),
        delta_nl=format_value(delta_nl),
        acc_nl=format_value(avg_acc_nl)
    )

latex_code_lora += """
     \\bottomrule
    \\end{tabular}
  }
\\end{table*}
"""

# Generate LaTeX table for other methods
latex_code_other = """
\\begin{table*}[h]
  \\caption{\\textbf{LLaVA Model Performance:} Other Methods}
  \\label{tab:other_methods_acc}
  \\centering
  \\resizebox{\\linewidth}{!}{
    \\begin{tabular}{l|cccc|c|cc}
     \\toprule
     \\textbf{Model} & \\multicolumn{4}{c|}{\\textbf{Vision-Language (VL)}} & \\textbf{VL Avg.} & \\multicolumn{2}{c}{\\textbf{NL Avg.}} \\\\
     & \\textbf{VQAv2} & \\textbf{TextVQA OCR} & \\textbf{TextVQA Pure} & \\textbf{GQA} & Acc $\\uparrow$ & $\\Delta \\downarrow$ & Acc $\\uparrow$ \\\\
     \\midrule
"""

latex_code_other += "Naive FT & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {avg_acc_vl} & {delta_nl} & {avg_acc_nl} \\\\\n".format(
    vqa_v2=format_value(original_llava_acc["vqa-v2"]),
    textvqa_ocr=format_value(original_llava_acc["textvqa-ocr"]),
    textvqa_pure=format_value(original_llava_acc["textvqa-pure"]),
    gqa=format_value(original_llava_acc["gqa"]),
    avg_acc_vl=format_value(hmean([original_llava_acc["vqa-v2"], original_llava_acc["textvqa-ocr"], original_llava_acc["textvqa-pure"], original_llava_acc["gqa"]])),
    delta_nl=format_value(original_llava_avg_delta_nl),
    avg_acc_nl=format_value(hmean([original_llava_acc["wsc273"], original_llava_acc["winogrande"], original_llava_acc["arc_easy"], original_llava_acc["arc_challenge"], original_llava_acc["lambada_standard"]]))
)

latex_code_other += "Language Only LLM & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {avg_acc_vl} & - & {avg_acc_nl} \\\\\n".format(
    vqa_v2=format_value(language_only_llm_acc.get("vqa-v2", np.nan)),
    textvqa_ocr=format_value(language_only_llm_acc.get("textvqa-ocr", np.nan)),
    textvqa_pure=format_value(language_only_llm_acc.get("textvqa-pure", np.nan)),
    gqa=format_value(language_only_llm_acc.get("gqa", np.nan)),
    avg_acc_vl=format_value(hmean([language_only_llm_acc.get("vqa-v2", 0), language_only_llm_acc.get("textvqa-ocr", 0), language_only_llm_acc.get("textvqa-pure", 0), language_only_llm_acc.get("gqa", 0)])),
    avg_acc_nl=format_value(hmean([language_only_llm_acc.get("wsc273", 0), language_only_llm_acc.get("winogrande", 0), language_only_llm_acc.get("arc_easy", 0), language_only_llm_acc.get("arc_challenge", 0), language_only_llm_acc.get("lambada_standard", 0)]))
)

latex_code_other += "\\midrule\n"

for model, accuracies, avg_acc_vl, delta_nl, avg_acc_nl in other_table_data:
    latex_code_other += "{model} & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {acc_vl} & {delta_nl} & {acc_nl} \\\\\n".format(
        model=model,
        vqa_v2=format_value(accuracies["vqa-v2"]),
        textvqa_ocr=format_value(accuracies["textvqa-ocr"]),
        textvqa_pure=format_value(accuracies["textvqa-pure"]),
        gqa=format_value(accuracies["gqa"]),
        acc_vl=format_value(avg_acc_vl),
        delta_nl=format_value(delta_nl),
        acc_nl=format_value(avg_acc_nl)
    )

latex_code_other += """
     \\bottomrule
    \\end{tabular}
  }
\\end{table*}
"""

print(latex_code_lora)
print(latex_code_other)


{
  "Language Only LLM": {
    "vqa-v2": 0.001,
    "textvqa-ocr": 0.001953125,
    "textvqa-pure": 0.0,
    "gqa": 0.002,
    "refcoco": 0.0,
    "wsc273": 0.6446886446886447,
    "winogrande": 0.5343330702446725,
    "lambada_standard": 0.39355715117407336,
    "arc_easy": 0.5147306397306397,
    "arc_challenge": 0.20648464163822525
  },
  "Naive FT (410M)": {
    "vqa-v2": 0.5488000000000001,
    "textvqa-ocr": 0.24384765625000018,
    "textvqa-pure": 0.20781250000000018,
    "gqa": 0.4102,
    "refcoco": 0.052734375,
    "wsc273": 0.6043956043956044,
    "winogrande": 0.5295974743488555,
    "lambada_standard": 0.3073937512128857,
    "arc_easy": 0.4978956228956229,
    "arc_challenge": 0.2295221843003413
  },
  "Naive FT (410M, +2 Epochs)": {
    "vqa-v2": 0.025699999999999997,
    "textvqa-ocr": 0.003125,
    "textvqa-pure": 0.0421875,
    "gqa": 0.0039000000000000003,
    "refcoco": 0.0341796875,
    "wsc273": 0.6190476190476191,
    "winogrande": 0.5445935280189423,
    "lambad

Pythia1B Epochs Analysis

In [2]:
import os
import json
import numpy as np
from scipy.stats import hmean

# Define the datasets of interest
datasets = [
    'vqa-v2', 'textvqa-ocr', 'textvqa-pure', 'gqa', 'refcoco',
    'wsc273', 'winogrande', 'lambada_standard', 'arc_easy', 'arc_challenge'
]

# Define the result dictionary
result = {}

# Path to the JSON results file
results_file = 'results_nlp.json'

# Read the JSON results file
if os.path.isfile(results_file):
    with open(results_file, 'r') as f:
        result = json.load(f)
else:
    print(f"Error: File {results_file} not found.")

# Mapping from different methods to model names
model_mappings = {
    'vicuna' : 'reproduction-align-pythia+1b',
    'original' : 'stage-final-llava-v15-pythia+1b',
    'original+2epochs': 'stage-final-llava-v15-pythia+1b-epochs-2',
    'original+3epochs': 'stage-final-llava-v15-pythia+1b-epochs-3',
    'soft': 'stage-final-llava-v15-pythia+1b-soft',
    'soft+2epochs': 'stage-final-llava-v15-pythia+1b-soft-epochs-2',
    'soft+3epochs': 'stage-final-llava-v15-pythia+1b-soft-epochs-3',
    'sgm+oolf': 'stage-final-llava-v15-pythia+1b-sgm-oolf'
}

# Label name mappings for the main methods and variants of LoRA
name_mapping = {
    'vicuna' : 'Language Only LLM',
    'original': 'Naive FT (1B)',
    'original+2epochs': 'Naive FT (1B, +2 Epochs)',
    'original+3epochs': 'Naive FT (1B, +3 Epochs)',
    'soft': 'Soft Targets (1B)',
    'soft+2epochs': 'Soft Targets (1B, +2 Epochs)',
    'soft+3epochs': 'Soft Targets (1B, +3 Epochs)',
    'sgm+oolf': 'SGM + Corrected OLF (1B)'
}

# Initialize model mappings
model_results = {model: [] for model in model_mappings.keys()}

# Populate the mappings based on the given mappings
for method, model_name in model_mappings.items():
    if model_name in result:
        metrics = result[model_name]
        accuracies = list(metrics.values())
        avg_accuracy = np.nanmean(accuracies)
        model_results[method].append((model_name, avg_accuracy))
    else:
        print(f"Warning: Model '{model_name}' not found in results")

# Identify the highest accuracy model for each mapping
highest_accuracy_models = {}
for method, mappings in model_results.items():
    if mappings:
        highest_accuracy_models[method] = max(mappings, key=lambda x: x[1])

# Save the highest accuracy models to a file
output_file = 'highest_accuracy_models.json'
with open(output_file, 'w') as f:
    json.dump(highest_accuracy_models, f, indent=2)

# Prepare data for radar chart
methods = [name_mapping[model] for model in model_mappings.keys() if model in highest_accuracy_models]
results_dict = {}

for model in model_mappings.keys():
    if model in highest_accuracy_models:
        model_name, _ = highest_accuracy_models[model]
        metrics = result[model_name]
        accuracies = {dataset: metrics.get(dataset, np.nan) for dataset in datasets}
        results_dict[name_mapping[model]] = accuracies

# Output the results dictionary
print(json.dumps(results_dict, indent=2))

import json
import numpy as np

# Calculate delta values
original_llava_acc = results_dict["Naive FT (1B)"]
language_only_llm_acc = results_dict.get("Language Only LLM", {})

table_data = []

for model, accuracies in results_dict.items():
    if model in ["Naive FT (1B)", "Language Only LLM"]:
        continue
    avg_acc_vl = hmean([accuracies[dataset] for dataset in ["vqa-v2", "textvqa-ocr", "textvqa-pure", "gqa"]])
    avg_acc_nl = hmean([accuracies[dataset] for dataset in ["wsc273", "winogrande", "arc_easy", "arc_challenge", "lambada_standard"]])
    delta_nl = hmean([language_only_llm_acc.get(dataset, np.nan) for dataset in ["wsc273", "winogrande", "arc_easy", "arc_challenge", "lambada_standard"]]) - avg_acc_nl 
    table_data.append((model, accuracies, avg_acc_vl, delta_nl, avg_acc_nl))

# Sort the data by Avg. VL Accuracy and highest NL Delta
table_data = sorted(table_data, key=lambda x: (x[2], -x[3]), reverse=True)

# Separate the data into two groups
lora_variants = [
    'LoRA (1/4 Full Rank)', 'LoRA (1/4 Full Rank, Higher Alpha)', 
    'LoRA (1/2 Full Rank)', 'LoRA (1/2 Full Rank, Higher Alpha)', 
    'LoRA (1/2 Full Rank, RSLoRA)', 'LoRA (1/2 Full Rank, RSLoRA, KQV Target)'
]

lora_table_data = [item for item in table_data if item[0] in lora_variants]
other_table_data = [item for item in table_data if item[0] not in lora_variants]

# Function to format values or return "-"
def format_value(value):
    return "{:.2f}".format(value * 100) if not np.isnan(value) else "-"

# Generate LaTeX table for LoRA variants
latex_code_lora = """
\\begin{table*}[h]
  \\caption{\\textbf{LLaVA Model Performance:} LoRA Variants}
  \\label{tab:lora_variants_acc}
  \\centering
  \\resizebox{\\linewidth}{!}{
    \\begin{tabular}{l|cccc|c|cc}
     \\toprule
     \\textbf{Model} & \\multicolumn{4}{c|}{\\textbf{Vision-Language (VL)}} & \\textbf{VL Avg.} & \\multicolumn{2}{c}{\\textbf{NL Avg.}} \\\\
     & \\textbf{VQAv2} & \\textbf{TextVQA OCR} & \\textbf{TextVQA Pure} & \\textbf{GQA} & Acc $\\uparrow$ & $\\Delta \\downarrow$ & Acc $\\uparrow$ \\\\
     \\midrule
"""

original_llava_avg_delta_nl = hmean([language_only_llm_acc.get(dataset, 0) for dataset in ["wsc273", "winogrande", "arc_easy", "arc_challenge", "lambada_standard"]]) - hmean([original_llava_acc[dataset] for dataset in ["wsc273", "winogrande", "arc_easy", "arc_challenge", "lambada_standard"]])

latex_code_lora += "Naive FT & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {avg_acc_vl} & {delta_nl} & {avg_acc_nl} \\\\\n".format(
    vqa_v2=format_value(original_llava_acc["vqa-v2"]),
    textvqa_ocr=format_value(original_llava_acc["textvqa-ocr"]),
    textvqa_pure=format_value(original_llava_acc["textvqa-pure"]),
    gqa=format_value(original_llava_acc["gqa"]),
    avg_acc_vl=format_value(hmean([original_llava_acc["vqa-v2"], original_llava_acc["textvqa-ocr"], original_llava_acc["textvqa-pure"], original_llava_acc["gqa"]])),
    delta_nl=format_value(original_llava_avg_delta_nl),
    avg_acc_nl=format_value(hmean([original_llava_acc["wsc273"], original_llava_acc["winogrande"], original_llava_acc["arc_easy"], original_llava_acc["arc_challenge"], original_llava_acc["lambada_standard"]]))
)

latex_code_lora += "Language Only LLM & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {avg_acc_vl} & - & {avg_acc_nl} \\\\\n".format(
    vqa_v2=format_value(language_only_llm_acc.get("vqa-v2", np.nan)),
    textvqa_ocr=format_value(language_only_llm_acc.get("textvqa-ocr", np.nan)),
    textvqa_pure=format_value(language_only_llm_acc.get("textvqa-pure", np.nan)),
    gqa=format_value(language_only_llm_acc.get("gqa", np.nan)),
    avg_acc_vl=format_value(hmean([language_only_llm_acc.get("vqa-v2", 0), language_only_llm_acc.get("textvqa-ocr", 0), language_only_llm_acc.get("textvqa-pure", 0), language_only_llm_acc.get("gqa", 0)])),
    avg_acc_nl=format_value(hmean([language_only_llm_acc.get("wsc273", 0), language_only_llm_acc.get("winogrande", 0), language_only_llm_acc.get("arc_easy", 0), language_only_llm_acc.get("arc_challenge", 0), language_only_llm_acc.get("lambada_standard", 0)]))
)

latex_code_lora += "\\midrule\n"

for model, accuracies, avg_acc_vl, delta_nl, avg_acc_nl in lora_table_data:
    latex_code_lora += "{model} & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {acc_vl} & {delta_nl} & {acc_nl} \\\\\n".format(
        model=model,
        vqa_v2=format_value(accuracies["vqa-v2"]),
        textvqa_ocr=format_value(accuracies["textvqa-ocr"]),
        textvqa_pure=format_value(accuracies["textvqa-pure"]),
        gqa=format_value(accuracies["gqa"]),
        acc_vl=format_value(avg_acc_vl),
        delta_nl=format_value(delta_nl),
        acc_nl=format_value(avg_acc_nl)
    )

latex_code_lora += """
     \\bottomrule
    \\end{tabular}
  }
\\end{table*}
"""

# Generate LaTeX table for other methods
latex_code_other = """
\\begin{table*}[h]
  \\caption{\\textbf{LLaVA Model Performance:} Other Methods}
  \\label{tab:other_methods_acc}
  \\centering
  \\resizebox{\\linewidth}{!}{
    \\begin{tabular}{l|cccc|c|cc}
     \\toprule
     \\textbf{Model} & \\multicolumn{4}{c|}{\\textbf{Vision-Language (VL)}} & \\textbf{VL Avg.} & \\multicolumn{2}{c}{\\textbf{NL Avg.}} \\\\
     & \\textbf{VQAv2} & \\textbf{TextVQA OCR} & \\textbf{TextVQA Pure} & \\textbf{GQA} & Acc $\\uparrow$ & $\\Delta \\downarrow$ & Acc $\\uparrow$ \\\\
     \\midrule
"""

latex_code_other += "Naive FT & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {avg_acc_vl} & {delta_nl} & {avg_acc_nl} \\\\\n".format(
    vqa_v2=format_value(original_llava_acc["vqa-v2"]),
    textvqa_ocr=format_value(original_llava_acc["textvqa-ocr"]),
    textvqa_pure=format_value(original_llava_acc["textvqa-pure"]),
    gqa=format_value(original_llava_acc["gqa"]),
    avg_acc_vl=format_value(hmean([original_llava_acc["vqa-v2"], original_llava_acc["textvqa-ocr"], original_llava_acc["textvqa-pure"], original_llava_acc["gqa"]])),
    delta_nl=format_value(original_llava_avg_delta_nl),
    avg_acc_nl=format_value(hmean([original_llava_acc["wsc273"], original_llava_acc["winogrande"], original_llava_acc["arc_easy"], original_llava_acc["arc_challenge"], original_llava_acc["lambada_standard"]]))
)

latex_code_other += "Language Only LLM & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {avg_acc_vl} & - & {avg_acc_nl} \\\\\n".format(
    vqa_v2=format_value(language_only_llm_acc.get("vqa-v2", np.nan)),
    textvqa_ocr=format_value(language_only_llm_acc.get("textvqa-ocr", np.nan)),
    textvqa_pure=format_value(language_only_llm_acc.get("textvqa-pure", np.nan)),
    gqa=format_value(language_only_llm_acc.get("gqa", np.nan)),
    avg_acc_vl=format_value(hmean([language_only_llm_acc.get("vqa-v2", 0), language_only_llm_acc.get("textvqa-ocr", 0), language_only_llm_acc.get("textvqa-pure", 0), language_only_llm_acc.get("gqa", 0)])),
    avg_acc_nl=format_value(hmean([language_only_llm_acc.get("wsc273", 0), language_only_llm_acc.get("winogrande", 0), language_only_llm_acc.get("arc_easy", 0), language_only_llm_acc.get("arc_challenge", 0), language_only_llm_acc.get("lambada_standard", 0)]))
)

latex_code_other += "\\midrule\n"

for model, accuracies, avg_acc_vl, delta_nl, avg_acc_nl in other_table_data:
    latex_code_other += "{model} & {vqa_v2} & {textvqa_ocr} & {textvqa_pure} & {gqa} & {acc_vl} & {delta_nl} & {acc_nl} \\\\\n".format(
        model=model,
        vqa_v2=format_value(accuracies["vqa-v2"]),
        textvqa_ocr=format_value(accuracies["textvqa-ocr"]),
        textvqa_pure=format_value(accuracies["textvqa-pure"]),
        gqa=format_value(accuracies["gqa"]),
        acc_vl=format_value(avg_acc_vl),
        delta_nl=format_value(delta_nl),
        acc_nl=format_value(avg_acc_nl)
    )

latex_code_other += """
     \\bottomrule
    \\end{tabular}
  }
\\end{table*}
"""

print(latex_code_lora)
print(latex_code_other)


{
  "Language Only LLM": {
    "vqa-v2": 0.044000000000000004,
    "textvqa-ocr": 0.00615234375,
    "textvqa-pure": 0.04140625,
    "gqa": 0.0127,
    "refcoco": 0.0,
    "wsc273": 0.673992673992674,
    "winogrande": 0.5280189423835833,
    "lambada_standard": 0.4395497768290316,
    "arc_easy": 0.5845959595959596,
    "arc_challenge": 0.24488054607508533
  },
  "Naive FT (1B)": {
    "vqa-v2": 0.6443000000000001,
    "textvqa-ocr": 0.3836914062500002,
    "textvqa-pure": 0.3547851562499999,
    "gqa": 0.45409999999999995,
    "refcoco": 0.078125,
    "wsc273": 0.6556776556776557,
    "winogrande": 0.5351223362273086,
    "lambada_standard": 0.390064040364836,
    "arc_easy": 0.5715488215488216,
    "arc_challenge": 0.27047781569965873
  },
  "Naive FT (1B, +2 Epochs)": {
    "vqa-v2": 0.664,
    "textvqa-ocr": 0.38593750000000027,
    "textvqa-pure": 0.36591796875000004,
    "gqa": 0.4805,
    "refcoco": 0.1220703125,
    "wsc273": 0.663003663003663,
    "winogrande": 0.537490134175