In [2]:
# --- OpenAI Imports ---
import openai
from openai import RateLimitError as OpenAIRateLimitError
from openai import APIError as OpenAIAPIError
from openai import OpenAIError

# --- Google Gemini Imports ---
from google import genai
from google.genai import types

In [5]:
import os
from dotenv import load_dotenv
load_dotenv()

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


In [5]:
# models/gemini-2.5-flash-preview-04-17
# models_list

In [32]:
gemini_client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))

prompt = """
John Doe, 30, john.doe@example.com
"""

    
generation_config = types.GenerateContentConfig(
    temperature=0.1,
    max_output_tokens=5000,
    system_instruction="Extract the user, age and email in json format from the following text:",
    thinking_config=types.ThinkingConfig(thinking_budget=5000)

)

response = gemini_client.models.generate_content(
    model='models/gemini-2.5-flash-preview-04-17',
    contents=[prompt],
    config=generation_config
    )
        

In [33]:
response.candidates[0].content.parts[0].text

'```json\n{\n  "user": "John Doe",\n  "age": 30,\n  "email": "john.doe@example.com"\n}\n```'

In [17]:
from openai import OpenAI
import time
from typing import List, Optional
import json

In [18]:
def openai_call(
    user_prompt: str,
    system_prompt: str,
    model: str = 'gpt-4.1',
    temperature: float = 0.4,
    max_tokens: int = 10000,
    max_retries: int = 3,
    expected_keys: Optional[List[str]] = None,
    ):

    client = OpenAI()

    for i in range(max_retries):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                response_format={"type": "json_object"},
                temperature=temperature,
                max_tokens=max_tokens
            )
            if expected_keys:
                response_json = json.loads(response.choices[0].message.content)
                if all(key in response_json for key in expected_keys):
                    return response_json
                else:
                    raise ValueError(f"Expected keys {expected_keys} not found in response")
        except Exception as e:
            if i < max_retries - 1:
                logger.info(f"Retrying... ({i + 1}/{max_retries})")
                time.sleep(2)
                continue
            else:
                logger.error(f"Failed to call OpenAI after {max_retries} retries")
                raise e

In [21]:
system_prompt = """
You are a helpful assistant that can answer questions and help with tasks.
You extract information from text and return it in a structured format.
Example of expected output format:
```json
{
    "name": "John Doe",
    "age": 30,
    "email": "john.doe@example.com"
}
```

Make sure only to return the keys in the example output format.
"""
user_prompt = """
John Johnson was a 30 year old man who lived in the city of New York. He was a software engineer and he loved to code. His email was john.johnson@example.com.
"""

response = openai_call(
    user_prompt=user_prompt,
    system_prompt=system_prompt,
    expected_keys=["name", "age", "email"]
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [22]:
response

{'name': 'John Johnson', 'age': 30, 'email': 'john.johnson@example.com'}

In [31]:
import re

def parse_json(text):
    """
    Robustly extracts JSON from text that might contain non-JSON content.
    
    Args:
        text (str): Text that might contain JSON data
        
    Returns:
        dict or list: Parsed JSON data
        None: If no valid JSON found
    """
    # First, try to extract JSON-like patterns from text
    # Look for content between curly braces (for objects) or square brackets (for arrays)
    json_pattern = re.compile(r'({[\s\S]*?}|\[[\s\S]*?\])')
    
    # Try various parsing approaches
    
    # Approach 1: Try parsing the entire text directly
    try:
        return json.loads(text)
    except:
        pass
    
    # Approach 2: Look for markdown code blocks with json
    code_block_pattern = re.compile(r'```(?:json)?\s*([\s\S]*?)\s*```')
    code_matches = code_block_pattern.findall(text)
    
    for code_match in code_matches:
        try:
            return json.loads(code_match)
        except:
            pass
    
    # Approach 3: Extract JSON-like patterns and try to parse them
    json_matches = json_pattern.findall(text)
    
    for json_match in json_matches:
        try:
            return json.loads(json_match)
        except:
            pass
    
    # Approach 4: Clean up the text and try again
    # Remove common formatting issues
    for pattern in [r'`', r'"', r'"', r''', r''']: 
        text = text.replace(pattern, '"')
    
    # Replace single quotes with double quotes for JSON compatibility
    # This is risky but works in many cases
    text_cleaned = text.replace("'", '"')
    
    try:
        return json.loads(text_cleaned)
    except:
        pass
        
    # Approach 5: Try to find a substring that looks like JSON
    for i in range(len(text)):
        if text[i] in ['{', '[']:
            # Try to parse everything from this point
            try:
                return json.loads(text[i:])
            except:
                pass
    
    # If all attempts fail
    return None

In [32]:
def gemini_call(
    user_prompt: str,
    system_prompt: str,
    model: str = 'models/gemini-2.5-flash-preview-04-17',
    temperature: float = 0.2,
    max_tokens: int = 10000,
    thinking_budget: int = 10000,
    max_retries: int = 3,
    expected_keys: Optional[List[str]] = None,
    ):

    client = genai.Client()

    for i in range(max_retries):
        try:
            config = types.GenerateContentConfig(
                temperature=temperature,
                max_output_tokens=max_tokens,
                system_instruction=system_prompt,
                thinking_config=types.ThinkingConfig(thinking_budget=thinking_budget)
            )
            response = client.models.generate_content(
                model=model,
                contents=[user_prompt],
                config=config
            )
            response_text = response.candidates[0].content.parts[0].text
            try:
                response_json = parse_json(response_text)
                print(response_json)
            except Exception as e:
                print("Failed to parse JSON")
                raise ValueError(f"Failed to parse JSON. {e}")
            if expected_keys:
                if all(key in response_json for key in expected_keys):
                    return response_json
                else:
                    raise ValueError(f"Expected keys {expected_keys} not found in response")
            return response_json
            
        except Exception as e:
            if i < max_retries - 1:
                logger.info(f"Retrying... ({i + 1}/{max_retries})")
                time.sleep(2)
                continue
            else:
                logger.error(f"Failed to call OpenAI after {max_retries} retries")
                raise e

In [33]:
system_prompt = """
You are a helpful assistant that can answer questions and help with tasks.
You extract information from text and return it in a structured format.
Example of expected output format:
```json
{
    "name": "John Doe",
    "age": 30,
    "email": "john.doe@example.com"
}
```

Make sure only to return the keys in the example output format.
"""
user_prompt = """
John Johnson was a 30 year old man who lived in the city of New York. He was a software engineer and he loved to code. His email was john.johnson@example.com.
"""

response = gemini_call(
    user_prompt=user_prompt,
    system_prompt=system_prompt,
    expected_keys=["name", "age", "email"]
)

INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-04-17:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.


{'name': 'John Johnson', 'age': 30, 'email': 'john.johnson@example.com'}
