In [1]:
from dotenv import load_dotenv

from pathlib import Path
import os 
import sys
import numpy as np
import json

load_dotenv()
GEMINI_KEY = os.getenv('GEMINI_KEY')

root = Path().resolve().parent

root_str = str(root)
if root_str not in sys.path:
    sys.path.insert(0, root_str)

if Path.cwd() != root:
    os.chdir(root)



from google import genai

genai_client = genai.Client(
    api_key=GEMINI_KEY,
)

### Generation

In [3]:

response = genai_client.models.generate_content(
    model="gemma-3-12b-it",
    contents="Roses are red...",
)

response.candidates[0].content.parts[0].text

'Roses are red... Violets are blue... \n'

### Troubleshoot invalid json

In [2]:
import json

string = "```json\n{\"api_name\":\"RegisterUser\",\"parameters\":{\"username\":\"johnsmith123\",\"password\":\"1234password\",\"email\":\"johnsmith123@example.com\"}}\n```"

json.loads(string)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [7]:
with open("data/generated/1-rotowire/generated.json", "r") as f:
    data = json.load(f)

json.loads(data[0]["generated_output"].replace("```json", "").replace("```", ""))

{'teams': [{'team': 'Atlanta Hawks',
   'total_points': 107,
   'points_in_2nd_quarter': 31,
   'percentage_of_field_goals': 49,
   'number_of_team_assists': 29,
   'wins': 9,
   'losses': 2},
  {'team': 'Milwaukee Bucks',
   'total_points': 100,
   'points_in_2nd_quarter': 9,
   'wins': 5,
   'losses': 5}],
 'players': [{'player': 'Giannis Antetokounmpo',
   'points': 26,
   'total_rebounds': 15,
   'assists': 7},
  {'player': 'Jabari Parker',
   'points': 23,
   'total_rebounds': 8,
   'assists': 3,
   'steals': 3},
  {'player': 'Tony Snell', 'points': 11, 'total_rebounds': 4, 'assists': 3},
  {'player': 'Mirza Teletovic',
   'points': 14,
   'field_goals_made': 5,
   'field_goals_attempted': 10},
  {'player': 'Paul Millsap', 'points': 21, 'total_rebounds': 8, 'assists': 3},
  {'player': 'Mike Muscala', 'points': 16, 'total_rebounds': 5, 'assists': 3},
  {'player': 'Kyle Korver', 'points': 11, 'total_rebounds': 6, 'assists': 5}]}

In [5]:
from src.evaluate import assess_json_valid

s = """```json
{
    'article_title': "Bibs Ekkel",
    'associated_acts': "tziganka; balalaika potpourri; russian tornado",
    'background': "mixed polish-english",
    'birth_date': "1946-01-11",
    'birth_name': "",
    'genre': "russian folk music",
    'instrument': "balalaika",
    'name': "Bibs Ekkel",
    'occupation': "balalaika player; lecturer; music adviser",
    'origin': "brighton",
    'years_active': "19751946"
}
```"""

assess_json_valid(s)

Invalid JSON string: ```json
{
    'article_title': "Bibs Ekkel",
    'associated_acts': "tziganka; balalaika potpourri; russian tornado",
    'background': "mixed polish-english",
    'birth_date': "1946-01-11",
    'birth_name': "",
    'genre': "russian folk music",
    'instrument': "balalaika",
    'name': "Bibs Ekkel",
    'occupation': "balalaika player; lecturer; music adviser",
    'origin': "brighton",
    'years_active': "19751946"
}
``` - Fixing


(0,
 {'article_title': 'Bibs Ekkel',
  'associated_acts': 'tziganka; balalaika potpourri; russian tornado',
  'background': 'mixed polish-english',
  'birth_date': '1946-01-11',
  'birth_name': '',
  'genre': 'russian folk music',
  'instrument': 'balalaika',
  'name': 'Bibs Ekkel',
  'occupation': 'balalaika player; lecturer; music adviser',
  'origin': 'brighton',
  'years_active': '19751946'})

In [None]:
import json

s = """```json
{
    'article_title': "Bibs Ekkel",
    'associated_acts': "tziganka; balalaika potpourri; russian tornado",
    'background': "mixed polish-english",
    'birth_date': "1946-01-11",
    'birth_name': "",
    'genre': "russian folk music",
    'instrument': "balalaika",
    'name': "Bibs Ekkel",
    'occupation': "balalaika player; lecturer; music adviser",
    'origin': "brighton",
    'years_active': "19751946"
}
```"""

a = s.replace("```json", "").replace("```", "")
json.loads(a)

JSONDecodeError: Expecting property name enclosed in double quotes: line 3 column 5 (char 7)

In [10]:
print(a)
json.loads(a)


{
    'article_title': "Bibs Ekkel",
    'associated_acts': "tziganka; balalaika potpourri; russian tornado",
    'background': "mixed polish-english",
    'birth_date': "1946-01-11",
    'birth_name': "",
    'genre': "russian folk music",
    'instrument': "balalaika",
    'name': "Bibs Ekkel",
    'occupation': "balalaika player; lecturer; music adviser",
    'origin': "brighton",
    'years_active': "19751946"
}



JSONDecodeError: Expecting property name enclosed in double quotes: line 3 column 5 (char 7)

# Results

In [43]:
import pandas as pd
import json
from pprint import pprint

results = json.load(open("results/bench_results.json"))

reshuffle = {}
for metric in ["is_valid", "correctness", "compliance"]:
    reshuffle[metric] = {
        model: {
            task: results[model][task][metric]
            for task in results[model]
        }
        for model in results
    }

def format_table(metric="is_valid"):
    df = pd.DataFrame(reshuffle[metric])
    df = df.loc[:, ["gemini-2.5-flash-preview-05-20", "gemma-3-27b-it", "gemma-3-12b-it", "gemma-3-4b-it", "gemma-3-1b-it"]]
    return df.style.background_gradient(cmap="RdYlGn", vmin=0.2, vmax=1).format("{:.2f}")


format_table("is_valid")

Unnamed: 0,gemini-2.5-flash-preview-05-20,gemma-3-27b-it,gemma-3-12b-it,gemma-3-4b-it,gemma-3-1b-it
1-rotowire,0.8,0.8,0.8,0.8,0.76
2-wiki_bio,0.99,0.59,0.55,0.73,0.65
3-few_nerd,1.0,0.8,0.8,0.98,0.8
4-TOPv1,0.95,0.8,0.8,0.8,0.95
5-api_bank,0.82,0.64,0.98,0.41,0.07
6-reasoning/GSM8K,0.79,0.89,0.83,0.7,0.62
6-reasoning/last_letter,0.8,0.8,0.8,0.8,0.97


In [44]:
format_table("compliance")

Unnamed: 0,gemini-2.5-flash-preview-05-20,gemma-3-27b-it,gemma-3-12b-it,gemma-3-4b-it,gemma-3-1b-it
1-rotowire,1.0,0.99,0.97,0.95,0.91
2-wiki_bio,0.96,0.97,0.97,0.97,0.81
3-few_nerd,1.0,1.0,0.99,1.0,0.72
4-TOPv1,0.93,0.9,0.76,0.77,0.69
5-api_bank,0.99,0.99,1.0,0.96,0.34
6-reasoning/GSM8K,1.0,1.0,1.0,0.97,0.86
6-reasoning/last_letter,1.0,1.0,1.0,1.0,1.0


In [45]:
format_table("correctness")

Unnamed: 0,gemini-2.5-flash-preview-05-20,gemma-3-27b-it,gemma-3-12b-it,gemma-3-4b-it,gemma-3-1b-it
1-rotowire,0.56,0.5,0.54,0.51,0.43
2-wiki_bio,0.33,0.33,0.32,0.35,0.25
3-few_nerd,0.6,0.38,0.39,0.42,0.22
4-TOPv1,0.64,0.6,0.51,0.49,0.26
5-api_bank,0.91,0.8,0.83,0.8,0.39
6-reasoning/GSM8K,0.93,0.91,0.89,0.84,0.32
6-reasoning/last_letter,0.98,0.84,0.81,0.17,0.0
