In [None]:
! uv add groq
#!pip install Groq

In [None]:
import groq
from groq import Groq
import os
from getpass import getpass

In [None]:
print(groq.__version__)

In [None]:
from pprint import pprint as pp
# Set a custom width for pretty-printing
def pprint(data, width=80):
    """Pretty print data with a specified width."""
    pp(data, width=width)# List of model identifiers to query


### Setting Up API Keys

In [None]:
# Enter API key
api_key = getpass('Enter your Groq API key: ')

In [None]:
# Configure the default for all requests:
client = Groq(
    max_retries=2,# default is 2
    api_key=api_key,
     # 20 seconds (default is 1 minute)
    timeout=50.0,
)

In [None]:
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "generate a sample codemeta.json file",
        }
    ],
    model="llama3-8b-8192",
)
print(chat_completion.choices[0].message.content)

In [None]:
#Check the above generate Schema is valid on this website and edit it there only before pasting it in codemeta.json.
#https://jsonlint.com/

### Exploratory Data Analysis 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Read the 04_medals.csv file and do some analysis on the medals data.

In [None]:
# Load the medals dataset
# Replace 'medals.csv' with the actual path to your file
df = pd.read_csv('../01_data/04_medals.csv')
print(f"Shape: {df.shape}")
df.head(3)

In [None]:
df_clean = df.dropna(subset=['discipline', 'gender', 'medal_type']).copy()

In [None]:

# Create a summary table of medals per sport per gender
medals_by_sport_gender = df_clean.groupby(['discipline', 'gender']).size().reset_index(name='Medal_Count')

# Pivot the data for better visualization
medals_pivot = medals_by_sport_gender.pivot(index='discipline', columns='gender', values='Medal_Count').fillna(0)

print("\nMedals per Sport per Gender:")
print(medals_pivot)

In [None]:

# Visualization : Bar chart comparing total medals by gender
gender_totals = df_clean.groupby('gender').size()
plt.figure(figsize=(8, 6))
gender_totals.plot(kind='bar', color=['skyblue', 'yellow','pink','green'])
plt.title('Total Medals by Gender')
plt.xlabel('Gender')
plt.ylabel('Number of Medals')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Filter for women's gold medals and get counts by sport
df = pd.read_csv('../01_data/04_medals.csv')
print(df.shape)
# Filter for women's gold medals and get counts by sport
women_gold_medals = df[(df['gender'].str.lower() == 'w') & (df['medal_type'].str.lower() == 'gold medal')]

print(women_gold_medals.shape)

# Group by sport and count gold medals
gold_by_sport = women_gold_medals.groupby('discipline').size().sort_values(ascending=False)
print(gold_by_sport.shape)
#gold_by_sport.head()
# Convert to string format for the API
data_summary = gold_by_sport.head(10).to_string()


In [None]:
#| label: celllabel1
gold_by_sport.head(10)

In [None]:
print(data_summary)

In [None]:
# Instructions for Groq 
instructions = '''
Analyze the following Olympic medals data showing women's gold medal counts by sport:

{data_summary}

Based on this data:
1. Which sport has the most gold medals won by women?
2. What is the exact count of gold medals for that sport?
3. List the top 3 sports where women won the most gold medals with their counts.
4. Provide any interesting insights about women's performance across these sports.

Please provide a clear, concise analysis focusing on the sport with the highest women's gold medal count per sport.

'''

In [None]:
groq_response=''
try:
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": instructions,
            },
        ],
        model="llama3-8b-8192",
        temperature=0.1,  # Low temperature for factual analysis
        max_tokens=500
    )
    print("=== GROQ API RESPONSE ===")
    groq_response = chat_completion.choices[0].message.content
    print(groq_response)
except groq.APIConnectionError as e:
    print("The server could not be reached")
    print(e.__cause__)  # an underlying Exception, likely raised within httpx.
except groq.RateLimitError as e:
    print("A 429 status code was received; we should back off a bit.")
except groq.APIStatusError as e:
    print("Another non-200-range status code was received")
    print(e.status_code)
    print(e.response)

In [None]:
#| label: celllabel2
print(groq_response)

### Generate text for citation.cff file for Github

In [None]:
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Generate a citation.cff file for this repository",
        }
    ],
    model="llama3-8b-8192",
)
print(chat_completion.choices[0].message.content)

In [None]:
#Alternatively generate using this online tool : https://citation-file-format.github.io/cff-initializer-javascript/#/