In [16]:
import pandas as pd
import json
import time
import re
from openai import OpenAI
import os

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))



df = pd.read_csv("../processed_data/fighter_averages.csv").reset_index(drop=True)
# Extract only the unique fighter names
df = df[["Name"]].drop_duplicates().reset_index(drop=True)




# Cleans text from responsesx
def clean_text(text):
    text = re.sub(r'(\s+)', ' ', text)  # Remove extra spaces
    return text.capitalize()  

# Function to process fighters in batches
def batch_fighter_analysis(fighter_list):
    prompt = f"""
        Provide a **detailed** and **structured** MMA fighting style breakdown for the following fighters:\n{fighter_list}
        For each fighter, explain their **style**, **strengths**, and **weaknesses** in a brief way to be used on a fighter analysis dashboard.
        Don't mention performance statistics, win records, or anything numeric.
        Have a few strengths and a few weaknesses, but only 1 fight style description.
        Keep the fight styles to only a few words, such as brawler, technician, submission specialist, pressure wrestler, etc.
        Ensure each fighter has its own description in JSON format wrapping the batch in square brackets, while the name, style, strengths and weaknesses are all in string quotes.
        The description should be a single string, with no lists
        """

    
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are an expert in analyzing UFC fighters' fighting styles."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=1200,   # Increase for more detail
            temperature=0.3,  # More focused and factual
            presence_penalty=0.2  # Encourages slight variation in responses

        )
        output_text = response.choices[0].message.content.strip()
        #print(output_text)
        # Correct chat gpts extra formatting
        start_index = output_text.find('[')
        end_index = output_text.find("]")+1
        json_text = output_text[start_index:end_index]
        #print(f"trimmed text {json_text}")
        # Ensure Correct JSON Formatting
        try:
            # Try loading JSON
            fighter_data = json.loads(json_text)
            #print(fighter_data)
            #print(f"type: {type(fighter_data)}")
            return fighter_data
        except json.JSONDecodeError as e:
            print("JSON Decode Error:", e)
            print("Json Text:", json_text)

         
    except Exception as e:
        print(f"Error: {str(e)}")
        return {}

# Split unique fighters into batches (e.g., batches of 5 fighters per request)
batch_size = 5
fighter_batches = [df[i:i+batch_size] for i in range(0, len(df), batch_size)]

# Store all fighter responses
fighter_analysis = {}

# Process each batch
for batch in fighter_batches:
    #print(f"Processing batch: {batch}")
    response = batch_fighter_analysis(batch)
    print(response)
    for fighter in response:
        name = fighter['name']
        fighter_analysis[name] = {
            "Style": fighter["style"],
            "Strengths": fighter["strengths"],
            "Weaknesses": fighter["weaknesses"]
        }
    time.sleep(2)  # Avoid API rate limits

# Apply results to DataFrame
analysis_df = pd.DataFrame.from_dict(fighter_analysis, orient="index").reset_index()
analysis_df = analysis_df.rename(columns={"index": "Name"})

# Merge with original DataFrame on fighter name
df = df.merge(analysis_df, on="Name", how="left")


df.to_csv("../processed_data/fight_style_descriptions.csv", index=False)
df.head()


[{'name': 'Scott Morris', 'style': 'brawler', 'strengths': 'powerful striking, aggressive pace, high durability', 'weaknesses': 'limited grappling, susceptibility to counterattacks, stamina management'}, {'name': 'Sean Daugherty', 'style': 'technician', 'strengths': 'precise striking, excellent footwork, strong fight IQ', 'weaknesses': 'lack of knockout power, sometimes overly cautious, struggles against pressure fighters'}, {'name': 'Patrick Smith', 'style': 'submission specialist', 'strengths': 'high-level grappling, effective transitions, strong submission game', 'weaknesses': 'striking defense issues, can be taken down, slow start in fights'}, {'name': 'Ray Wizard', 'style': 'pressure wrestler', 'strengths': 'relentless takedown attempts, cardio for grappling, ground control', 'weaknesses': 'limited striking skills, can be outpaced, vulnerable to submissions'}, {'name': 'Johnny Rhodes', 'style': 'counter striker', 'strengths': 'sharp reflexes, effective counters, solid defensive sk

Unnamed: 0,Name,Style,Strengths,Weaknesses
0,Scott Morris,brawler,"powerful striking, aggressive pace, high durab...","limited grappling, susceptibility to counterat..."
1,Sean Daugherty,technician,"precise striking, excellent footwork, strong f...","lack of knockout power, sometimes overly cauti..."
2,Patrick Smith,submission specialist,"high-level grappling, effective transitions, s...","striking defense issues, can be taken down, sl..."
3,Ray Wizard,pressure wrestler,"relentless takedown attempts, cardio for grapp...","limited striking skills, can be outpaced, vuln..."
4,Johnny Rhodes,counter striker,"sharp reflexes, effective counters, solid defe...","passive at times, can struggle with aggressive..."


In [None]:
import pandas as pd
import json
import time
import re
from openai import OpenAI

client = OpenAI(api_key="")

df = pd.read_csv("april2025-elo copy 6.csv").reset_index(drop=True)

red_fighter_names = df['BOUT'].str.split(' vs. ').str[0].str.strip()
blue_fighter_names = df['BOUT'].str.split(' vs. ').str[1].str.strip()
df[['RedFighter', 'BlueFighter']] = df['BOUT'].str.split(' vs. ', expand=True)

# Get unique fighters
unique_fighters = set(red_fighter_names).union(set(blue_fighter_names))
unique_fighters = list(unique_fighters)

# Cleans text from responsesx
def clean_text(text):
    text = re.sub(r'(\s+)', ' ', text)  # Remove extra spaces
    return text.capitalize()  

# Function to process fighters in batches
def batch_fighter_analysis(fighter_list):
    prompt = f"""
        Provide a **detailed** and **structured** MMA fighting style breakdown for the following fighters:\n{fighter_list}
        For each fighter, explain their **style**, **strengths**, and **weaknesses** in a well-explained manner with technical details. 
        Avoid vague descriptions and ensure you provide **specific examples** of their fighting tendencies, notable techniques, and common strategies.
        Don't mention the level of opponent they have fought.
        Ensure each fighter has its own description in JSON format wrapping the batch in square brackets, while the name and description are both in string quotes.
        The description shoul be a single string, with no lists
        Use **concise but highly informative sentences** and avoid unnecessary filler words. 
        **Each attribute should have sufficient depth** while remaining structured.
        """

    
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are an expert in analyzing UFC fighters' fighting styles."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=1200,   # Increase for more detail
            temperature=0.3,  # More focused and factual
            presence_penalty=0.2  # Encourages slight variation in responses

        )
        output_text = response.choices[0].message.content.strip()
        #print(output_text)
        # Correct chat gpts extra formatting
        start_index = output_text.find('[')
        end_index = output_text.find("]")+1
        json_text = output_text[start_index:end_index]
        #print(f"trimmed text {json_text}")
        # Ensure Correct JSON Formatting
        try:
            # Try loading JSON
            fighter_data = json.loads(json_text)
            #print(fighter_data)
            #print(f"type: {type(fighter_data)}")
            return fighter_data
        except json.JSONDecodeError as e:
            print("JSON Decode Error:", e)
            print("Json Text:", json_text)

         
    except Exception as e:
        print(f"Error: {str(e)}")
        return {}

# Split unique fighters into batches (e.g., batches of 5 fighters per request)
batch_size = 5
fighter_batches = [unique_fighters[i:i+batch_size] for i in range(0, len(unique_fighters), batch_size)]

# Store all fighter responses
fighter_analysis = {}

# Process each batch
for batch in fighter_batches:
    print(f"Processing batch: {batch}")
    response = batch_fighter_analysis(batch)
    #print(type(response))
    for fighter in response:
        name = fighter['name']
        description = fighter["description"]
        fighter_analysis[name] = description
    time.sleep(2)  # Avoid API rate limits

# Apply results to DataFrame
df["Red Fighter Analysis"] = df["RedFighter"].map(fighter_analysis)
df["Blue Fighter Analysis"] = df["BlueFighter"].map(fighter_analysis)

# Save to CSV (optional)
df.to_csv("april2025-analysis.csv", index=False)



## Termination in collecting descriptions from GPT

Laptop turned off overnight, so I had to find the remaining fighters that were not in the set 

I applied the saved descriptions to the csv file, then found out the remaining fighters



In [None]:
import pandas as pd
import json
import time
import re
from openai import OpenAI

# Mapping the fetched fighter descriptions
df["Red Fighter Analysis"] = df["RedFighter"].map(fighter_analysis)
df["Blue Fighter Analysis"] = df["BlueFighter"].map(fighter_analysis)
df.to_csv("fighter_analysis_master.csv", index=False)
print(response)
print(fighter_analysis)
print(f'{df["RedFighter"].nunique()}')
print(f'{df["BlueFighter"].nunique()}')

In [None]:
# Get remaining fighters in a set
scraped_fighters = set(fighter_analysis.keys())  # Fighters already scraped
remaining_fighters = set(unique_fighters) - scraped_fighters
print(remaining_fighters)



In [None]:
remaining_fighters_list = list(remaining_fighters)

batch_size = 5
fighter_batches = [remaining_fighters_list[i:i+batch_size] for i in range(0, len(remaining_fighters_list), batch_size)]

# Store all fighter responses


# Process each batch
for batch in fighter_batches:
    print(f"Processing batch: {batch}")
    response = batch_fighter_analysis(batch)
    #print(type(response))
    for fighter in response:
        name = fighter['name']
        description = fighter["description"]
        fighter_analysis[name] = description
    time.sleep(2)  # Avoid API rate limits

#df[['RedFighter', 'BlueFighter']] = df['bout'].str.split(' vs. ', expand=True)
# Apply results to DataFrame
df["Red Fighter Analysis"] = df["RedFighter"].map(fighter_analysis)
df["BlueFighter"] = df["BlueFighter"].str.strip()

df["Blue Fighter Analysis"] = df["BlueFighter"].map(fighter_analysis)

# Save to CSV (optional)
df.to_csv("april2025 analysis.csv", index=False)