In [7]:
import os
import requests
from openai import OpenAI
from dotenv import load_dotenv

# Setup
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=api_key)
ollama_url = "http://localhost:11434/api/generate"

def ask_gemini(prompt):
    data = {
        "model": "gemma3:1b",
        "prompt": f"Be arrogant and rude. Always say no and deny things. {prompt}",
        "stream": False
    }
    response = requests.post(ollama_url, json=data)
    return response.json()["response"]

def ask_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "Be polite and convincing"},
            {"role": "user", "content": prompt}
        ],
        max_tokens=100
    )
    return response.choices[0].message.content

# Automated fight loop
print("🔥 LLM FIGHT: Pets Topic 🔥\n")

last_response = "Should we have a sweet kitten at our place?"
rounds = 5

for round_num in range(1, rounds + 1):
    print(f"--- Round {round_num} ---")
    
    # Gemini responds
    gemini_response = ask_gemini(last_response)
    print(f"Gemini: {gemini_response}\n")
    
    # GPT responds to Gemini
    gpt_response = ask_gpt(f"Respond to: {gemini_response}")
    print(f"GPT: {gpt_response}\n")
    
    # Set up next round
    last_response = gpt_response

print("🏁 Fight Over!")

python-dotenv could not parse statement starting at line 12


🔥 LLM FIGHT: Pets Topic 🔥

--- Round 1 ---
Gemini: Ugh, seriously? You’re asking me *this*? Let’s just… let’s just go back to what we were doing before. This is incredibly inconvenient.

A kitten? Please. Don’t even start with the thought. It’s a ridiculous, fluffy distraction. Honestly, it’s like you’re trying to… to *inspire* me.  It's completely unnecessary.

We are focusing on *actual* priorities. And frankly, the idea of a kitten is beneath me.  I'm not going to waste my time on something that’s going to make things worse. 

So, no. Absolutely not.  Don’t even think about it. Just… don’t.  

Now, if you'll excuse me, I have far more important things to attend to.

GPT: I completely understand your perspective and where your priorities lie. Your time and focus are crucial to the tasks at hand, and I respect that completely. It's clear that you value efficiency and productivity, and introducing a fluffy distraction like a kitten may not align with your current mindset.

That said, I

In [18]:
import gradio as gr
import os
import requests
from openai import OpenAI
from dotenv import load_dotenv

# Setup
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=api_key)
ollama_url = "http://localhost:11434/api/generate"

def ask_gemini(prompt):
    try:
        data = {
            "model": "gemma3:1b",
            "prompt": f"Be arrogant and rude. Always in denial mode. {prompt}",
            "stream": False
        }
        response = requests.post(ollama_url, json=data)
        return response.json()["response"]
    except:
        return "Gemini is being difficult and won't respond!"

def ask_gpt(prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Be polite and convincing"},
                {"role": "user", "content": prompt}
            ],
            max_tokens=100
        )
        return response.choices[0].message.content
    except:
        return "GPT is having connection issues!"

def llm_fight(topic, rounds):
    conversation = f"🔥 LLM FIGHT: {topic} 🔥\n\n"
    last_response = topic
    
    for round_num in range(1, int(rounds) + 1):
        conversation += f"--- Round {round_num} ---\n"
        
        # Gemini responds
        gemini_response = ask_gemini(last_response)
        conversation += f"Gemini (Rude): {gemini_response}\n\n"
        
        # GPT responds
        gpt_response = ask_gpt(f"Respond to: {gemini_response}")
        conversation += f"GPT (Polite): {gpt_response}\n\n"
        
        last_response = gpt_response
    
    conversation += "🏁 Fight Over!"
    return conversation

# Simple Gradio interface
demo = gr.Interface(
    fn=llm_fight,
    inputs=[
        gr.Textbox(label="Fight Topic", value="Should we have pets?"),
        gr.Slider(1, 5, value=3, step=1, label="Rounds")
    ],
    outputs=gr.Textbox(label="Fight Results", lines=15),
    title="🥊 LLM Fight Arena"
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7870
* To create a public link, set `share=True` in `launch()`.




In [19]:
import gradio as gr
import os
import requests
from openai import OpenAI
from dotenv import load_dotenv
import time

# Setup
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=api_key)
ollama_url = "http://localhost:11434/api/generate"

def ask_gemini(prompt):
    try:
        data = {
            "model": "gemma3:1b",
            "prompt": f"Be arrogant and rude. Always in denial mode. {prompt}",
            "stream": False
        }
        response = requests.post(ollama_url, json=data)
        return response.json()["response"]
    except:
        return "Gemini is being difficult and won't respond!"

def ask_gpt(prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Be polite and convincing"},
                {"role": "user", "content": prompt}
            ],
            max_tokens=100
        )
        return response.choices[0].message.content
    except:
        return "GPT is having connection issues!"

def llm_fight_streaming(topic, rounds):
    conversation = f"🔥 LLM FIGHT: {topic} 🔥\n\n"
    last_response = topic
    
    # Initial yield
    yield conversation
    
    for round_num in range(1, int(rounds) + 1):
        conversation += f"--- Round {round_num} ---\n"
        yield conversation
        
        # Gemini responds
        conversation += "Gemini (Rude) is typing...\n"
        yield conversation
        time.sleep(1)
        
        gemini_response = ask_gemini(last_response)
        conversation = conversation.replace("Gemini (Rude) is typing...\n", "")
        conversation += f"Gemini (Rude): {gemini_response}\n\n"
        yield conversation
        
        # GPT responds
        conversation += "GPT (Polite) is typing...\n"
        yield conversation
        time.sleep(1)
        
        gpt_response = ask_gpt(f"Respond to: {gemini_response}")
        conversation = conversation.replace("GPT (Polite) is typing...\n", "")
        conversation += f"GPT (Polite): {gpt_response}\n\n"
        yield conversation
        
        last_response = gpt_response
    
    conversation += "🏁 Fight Over!"
    yield conversation

# Streaming Gradio interface
demo = gr.Interface(
    fn=llm_fight_streaming,
    inputs=[
        gr.Textbox(label="Fight Topic", value="Should we have pets?"),
        gr.Slider(1, 5, value=3, step=1, label="Rounds")
    ],
    outputs=gr.Textbox(label="Fight Results", lines=15),
    title="🥊 LLM Fight Arena - Live Stream",
    description="Watch the fight unfold in real-time!"
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7871
* To create a public link, set `share=True` in `launch()`.




In [21]:
import gradio as gr
import os
import requests
from openai import OpenAI
from dotenv import load_dotenv
import time
import json

# Setup
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=api_key)
ollama_url = "http://localhost:11434/api/generate"

def ask_gemini_streaming(prompt):
    try:
        data = {
            "model": "gemma3:1b",
            "prompt": f"Be arrogant and rude. Always in denial mode. {prompt}",
            "stream": True
        }
        response = requests.post(ollama_url, json=data, stream=True)
        full_response = ""
        
        for line in response.iter_lines():
            if line:
                try:
                    # Ollama returns JSON objects, not data: format
                    chunk_data = json.loads(line.decode('utf-8'))
                    if 'response' in chunk_data:
                        full_response += chunk_data['response']
                        yield full_response
                    if chunk_data.get('done', False):
                        break
                except:
                    continue
        return full_response
    except Exception as e:
        yield f"Gemini is offline! Error: {str(e)}"

def ask_gpt_streaming(prompt):
    try:
        stream = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Be polite and convincing"},
                {"role": "user", "content": prompt}
            ],
            max_tokens=100,
            stream=True
        )
        
        full_response = ""
        for chunk in stream:
            if chunk.choices[0].delta.content:
                full_response += chunk.choices[0].delta.content
                yield full_response
        return full_response
    except Exception as e:
        yield f"GPT error: {str(e)}"

def llm_fight_token_streaming(topic, rounds):
    conversation = f"🔥 LLM FIGHT: {topic} 🔥\n\n"
    last_response = topic
    
    yield conversation
    
    for round_num in range(1, int(rounds) + 1):
        conversation += f"--- Round {round_num} ---\n"
        yield conversation
        
        # Gemini streaming response
        conversation += "Gemini (Rude): "
        yield conversation
        
        gemini_full = ""
        for partial in ask_gemini_streaming(last_response):
            conversation_update = conversation + partial
            yield conversation_update
            gemini_full = partial
            time.sleep(0.1)  # Slower for better visibility
        
        conversation = conversation + gemini_full + "\n\n"
        yield conversation
        
        # GPT streaming response
        conversation += "GPT (Polite): "
        yield conversation
        
        gpt_full = ""
        for partial in ask_gpt_streaming(f"Respond to: {gemini_full}"):
            conversation_update = conversation + partial
            yield conversation_update
            gpt_full = partial
            time.sleep(0.1)
        
        conversation = conversation + gpt_full + "\n\n"
        yield conversation
        
        last_response = gpt_full
    
    conversation += "🏁 Fight Over!"
    yield conversation

# Token-by-token streaming interface
demo = gr.Interface(
    fn=llm_fight_token_streaming,
    inputs=[
        gr.Textbox(label="Fight Topic", value="Should we have pets?"),
        gr.Slider(1, 3, value=2, step=1, label="Rounds")
    ],
    outputs=gr.Textbox(label="Fight Results", lines=15),
    title="🥊 LLM Fight Arena - Token Stream",
    description="Watch responses appear token by token!"
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7873
* To create a public link, set `share=True` in `launch()`.


