In [1]:
import os
from typing import Dict, List
from groq import Groq

# Get a free API key from https://console.groq.com/keys
os.environ["GROQ_API_KEY"] = "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"

LLAMA3_70B_INSTRUCT = "llama3-70b-8192"
LLAMA3_8B_INSTRUCT = "llama3-8b-8192"

DEFAULT_MODEL = LLAMA3_70B_INSTRUCT

client = Groq()


In [2]:
from typing import List, Optional
import json
from enum import Enum
from pydantic import BaseModel
from groq import Groq

groq = Groq()

class TeamLevel(Enum):
    Level1 = "Senior"
    Level2 = "Reserve"
    Level3 = "Youth"    

class Spells(BaseModel):
    club_name: str
    team_level: TeamLevel
    manager: str
    end_of_spell: str
    start_of_spell: str
    league: str
    tier: str
    sacked: Optional[bool]
    probability_of_sacking_true: Optional[float]


def get_spell(club_name: str,end_of_spell: str) -> Spells:
    chat_completion = groq.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are a football statistician that outputs football facts about managerial spells in JSON.\n"
                # Pass the json schema to the model. Pretty printing improves results.
                f" The JSON object must use the schema: {json.dumps(Spells.model_json_schema(), indent=2)}",
            },
            {
                "role": "user",
                "content": f"Fetch football facts about the managerial spells at {club_name} which ended in {end_of_spell}.",
            },
        ],
        model=DEFAULT_MODEL,
        temperature=0,
        top_p= 0.9,
        # Streaming is not supported in JSON mode
        stream=False,
        # Enable JSON mode by setting the response format
        response_format={"type": "json_object"},
    )
    content = chat_completion.choices[0].message.content    

    return Spells.model_validate_json(chat_completion.choices[0].message.content)

#Italy U21	2008-09-10
spell = get_spell("Italy U21", "2008-09-10")
print(spell.club_name)
print(spell.team_level.value)
print(spell.manager)

Italy U21
Youth
Pierluigi Casiraghi


# Extracting information from managerial spells

In [7]:
import pandas as pd
from tqdm import tqdm
from groq import BadRequestError, InternalServerError
from pydantic import ValidationError
import logging  # Import logging module
inputs = pd.read_csv("LLM_input.csv")

# Create a progress bar
progress_bar = tqdm(total=len(inputs))   
    

def main():
    #...
    max_retries = 3  # Maximum number of retries
    retry_delay = 1  # Initial retry delay in seconds
    
    # Iterate over the rows of the csv file
    for i, row in inputs.iterrows():
        club = row["club_name"]
        date = row["end_of_spell"]

        for attempt in range(max_retries):
            try:
                spell = get_spell(club, date)
                # Save the output to a new dataframe
                inputs.loc[i, "manager"] = spell.manager
                inputs.loc[i, "Level"] = spell.team_level.value
                inputs.loc[i, "league"] = spell.league
                inputs.loc[i, "tier"] = spell.tier
                inputs.loc[i, "sacked"] = spell.sacked
                inputs.loc[i, "probability_of_sacking_true"] = spell.probability_of_sacking_true
                progress_bar.update(1)  # Update on success
                break  # Exit the retry loop if successful
            except (BadRequestError, ValidationError) as e:
                # Handle BadRequestError and ValidationError as before
                inputs.loc[i, 'error'] = str(e)
                continue
            except InternalServerError:
                logging.warning(f"InternalServerError encountered. Retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries})")
                time.sleep(retry_delay)
                progress_bar.update(0.1) # Update during retry
                retry_delay *= 2  # Double the delay for the next retry
        else:
            logging.error(f"Failed to fetch data for club '{club}' after {max_retries} attempts.")
            inputs.loc[i, 'error'] = "InternalServerError"

    # Update the progress bar
    progress_bar.update(1)

# Close the progress bar
progress_bar.close()
    
inputs.to_csv("LLM_output_augmented.csv", index=False)

if __name__ == "__main__":
    main()


  0%|          | 0/70600 [00:00<?, ?it/s]
ERROR:root:Failed to fetch data for club 'Israel' after 3 attempts.
ERROR:root:Failed to fetch data for club 'England' after 3 attempts.


In [6]:
inputs

Unnamed: 0,club_name,end_of_spell,manager,Level,league,tier,sacked,probability_of_sacking_true,error
0,West Ham,2018-05-16,David Moyes,Senior,Premier League,1,False,,
1,Sunderland,2017-05-22,David Moyes,Senior,Premier League,1,True,0.85,
2,Real Sociedad,2015-11-09,David Moyes,Senior,La Liga,1,True,0.85,
3,Man Utd,2014-04-22,David Moyes,Senior,Premier League,1,True,0.85,
4,Everton,2013-06-30,David Moyes,Senior,Premier League,1,False,,
...,...,...,...,...,...,...,...,...,...
70595,VW Hamme,2008-12-04,,,,,,,
70596,KSV Temse,2008-06-30,,,,,,,
70597,KRC Gent,2005-06-30,,,,,,,
70598,RS Haasdonk,2002-12-31,,,,,,,
