In [2]:
import sys
sys.path.append('..')

from consts import model_map
used_models = sorted(list(model_map.keys()))
used_models = [model.lower() for model in used_models]
used_models


['norwai/norwai-mistral-7b',
 'norwai/norwai-mistral-7b-instruct',
 'ruternorway/llama-2-13b-chat-norwegian',
 'ruternorway/llama-2-7b-chat-norwegian',
 'bineric/norskgpt-llama3-8b',
 'bineric/norskgpt-mistral-7b',
 'google/gemma-2-27b',
 'google/gemma-2-27b-it',
 'google/gemma-2-2b',
 'google/gemma-2-2b-it',
 'google/gemma-2-9b',
 'google/gemma-2-9b-it',
 'meta-llama/llama-2-13b-chat-hf',
 'meta-llama/llama-2-7b-chat-hf',
 'meta-llama/meta-llama-3-8b',
 'meta-llama/meta-llama-3-8b-instruct',
 'meta-llama/meta-llama-3.1-8b',
 'meta-llama/meta-llama-3.1-8b-instruct',
 'mistralai/mistral-7b-instruct-v0.1',
 'mistralai/mistral-7b-v0.1',
 'norallm/normistral-7b-scratch',
 'norallm/normistral-7b-warm',
 'norallm/normistral-7b-warm-instruct']

In [3]:
SELECTED_MODELS = used_models  # specify a list of models to extract

In [4]:
import requests
from bs4 import BeautifulSoup

id = "norwegian-nlg"
url = f"https://scandeval.com/{id}/"
res = requests.get(url)
soup = BeautifulSoup(res.content, "html.parser")
table = soup.find("table", {"id": {id}})
data = []

headers = []
for th in table.find("thead").find_all("th"):
    headers.append(th.text.strip())
rows = table.find("tbody").find_all("tr")


SELECTED_MODELS = used_models 

parsable_metrics = ["NorNE-nb", "NoReC", "NorQuAD", "HellaSwag-no"]
def parse_metric(metric, value):
    left, right = value.split("/")
    # VALUE ± STDDEV
    lval, lstd = left.split("±")
    rval, rstd = right.split("±")
    lval = float(lval.strip())
    rval = float(rval.strip())
    lstd = float(lstd.strip())
    rstd = float(rstd.strip())

    # here goes some custom rules for parsing specific metrics...
    match metric:
        # micro-avg F1 with MISC tags -> difficult
        case "NorNE-nb":
            return right
        # macro-avg F1
        case "NoReC":
            return right
        # F1
        case "NorQuAD":
            return right
        # accuracy
        case "HellaSwag-no":
            return right


# skip Model (fist header), we do that manually :-)
headers = [th.get_text(strip=True) for th in table.find_all("th")][1:]
# skip versions, if desired:
headers = [h for h in headers if not "version" in h.lower()]


for row in rows:
    row_data = {}
    cells = row.find_all("td")
    model_id = cells[0].text.strip()
    # handle the <modelname> (few-shot) naming scheme)
    model_id = model_id.split("(")[0].strip()

    if model_id.lower() not in SELECTED_MODELS:
        continue

    row_data["model"] = model_id
    for i, col in enumerate(headers):
        value = cells[i + 1].text.strip()  # +1 as we added model already
        if col in parsable_metrics:
            row_data[col] = parse_metric(col, value)
        else:
            # you can add in other non-parsable metrics here/versions etc.
            continue

    data.append(row_data)

for model_result in data:
    print(model_result)

{'model': 'google/gemma-2-27b-it', 'NorNE-nb': ' 56.75 ± 3.04', 'NoReC': ' 78.63 ± 0.96', 'NorQuAD': ' 73.41 ± 1.61', 'HellaSwag-no': ' 77.92 ± 1.72'}
{'model': 'google/gemma-2-9b-it', 'NorNE-nb': ' 44.91 ± 3.62', 'NoReC': ' 73.45 ± 0.94', 'NorQuAD': ' 70.14 ± 1.53', 'HellaSwag-no': ' 75.79 ± 1.47'}
{'model': 'google/gemma-2-27b', 'NorNE-nb': ' 43.06 ± 1.89', 'NoReC': ' 76.14 ± 1.68', 'NorQuAD': ' 80.21 ± 4.49', 'HellaSwag-no': ' 63.55 ± 4.76'}
{'model': 'google/gemma-2-9b', 'NorNE-nb': ' 34.62 ± 1.80', 'NoReC': ' 75.53 ± 0.73', 'NorQuAD': ' 72.99 ± 3.16', 'HellaSwag-no': ' 63.52 ± 3.49'}
{'model': 'bineric/NorskGPT-Llama3-8b', 'NorNE-nb': ' 60.25 ± 3.14', 'NoReC': ' 61.42 ± 3.56', 'NorQuAD': ' 74.57 ± 2.20', 'HellaSwag-no': ' 59.11 ± 2.44'}
{'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'NorNE-nb': ' 71.87 ± 0.97', 'NoReC': ' 71.58 ± 0.90', 'NorQuAD': ' 70.96 ± 3.00', 'HellaSwag-no': ' 54.03 ± 0.82'}
{'model': 'meta-llama/Meta-Llama-3.1-8B', 'NorNE-nb': ' 53.50 ± 3.27', 'NoReC': '

In [5]:
import pandas as pd
from datetime import datetime

current_date = datetime.now().strftime("%d-%m-%Y")
filename = f"{id}-{current_date}.csv"
df = pd.DataFrame(data)
df.to_csv(filename, index=False)