In [None]:
import os
from typing import Dict, List
from groq import Groq

# Get a free API key from https://console.groq.com/keys
os.environ["GROQ_API_KEY"] = "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"

LLAMA3_70B_INSTRUCT = "llama3-70b-8192"
LLAMA3_8B_INSTRUCT = "llama3-8b-8192"

DEFAULT_MODEL = LLAMA3_70B_INSTRUCT

client = Groq()


In [None]:
from typing import List, Optional
import json
from enum import Enum
from pydantic import BaseModel
from groq import Groq

groq = Groq()

class TeamLevel(Enum):
    Level1 = "Senior"
    Level2 = "Reserve"
    Level3 = "Youth"    

class Spells(BaseModel):
    club_name: str
    team_level: TeamLevel
    manager: str
    end_of_spell: str
    start_of_spell: str
    league: str
    tier: str
    sacked: Optional[bool]
    probability_of_sacking_true: Optional[float]


def get_spell(club_name: str,end_of_spell: str) -> Spells:
    chat_completion = groq.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are a football statistician that outputs football facts about managerial spells in JSON.\n"
                # Pass the json schema to the model. Pretty printing improves results.
                f" The JSON object must use the schema: {json.dumps(Spells.model_json_schema(), indent=2)}",
            },
            {
                "role": "user",
                "content": f"Fetch football facts about the managerial spells at {club_name} which ended in {end_of_spell}.",
            },
        ],
        model=DEFAULT_MODEL,
        temperature=0,
        top_p= 0.9,
        # Streaming is not supported in JSON mode
        stream=False,
        # Enable JSON mode by setting the response format
        response_format={"type": "json_object"},
    )
    content = chat_completion.choices[0].message.content    

    return Spells.model_validate_json(chat_completion.choices[0].message.content)

#Italy U21	2008-09-10
spell = get_spell("Italy U21", "2008-09-10")
print(spell.club_name)
print(spell.team_level.value)
print(spell.manager)

# Extracting information from managerial spells

In [None]:
progress_bar.close()

In [None]:
import pandas as pd
from tqdm import tqdm
from groq import BadRequestError
from pydantic import ValidationError
inputs = pd.read_csv("LLM_input.csv")

# Create a progress bar
progress_bar = tqdm(total=len(inputs))

# Iterate over the rows of the csv file
for i, row in inputs.iterrows():
    club = row["club_name"]
    date = row["end_of_spell"]
    try: 
        spell = get_spell(club, date)
        print(spell.club_name)
        print(spell.manager)
        print(spell.end_of_spell)
        print(spell.start_of_spell)
        print(spell.league)
        print(spell.tier)
        print(spell.sacked)
        print(spell.probability_of_sacking_true)
        print("\n")
        # Save the output to a new dataframe
        inputs.loc[i, "manager"] = spell.manager
        inputs.loc[i, "Level"] = spell.team_level.value
        inputs.loc[i, "league"] = spell.league
        inputs.loc[i, "tier"] = spell.tier
        inputs.loc[i, "sacked"] = spell.sacked
        inputs.loc[i, "probability_of_sacking_true"] = spell.probability_of_sacking_true
    except (BadRequestError, ValidationError) as e:
        # Handle the error
        inputs.loc[i, 'error'] = str(e)
        continue    
    
    # Update the progress bar
    progress_bar.update(1)

# Close the progress bar
progress_bar.close()
    

In [6]:
inputs.to_csv("LLM_output_augmented.csv", index=False)