In [None]:
import os
import random
import string
import threading
import json
from fastapi import FastAPI, HTTPException, Header
from pydantic import BaseModel
from typing import List, Optional
from groq import Groq

app = FastAPI()

# Initialize the master API client with your API key
master_client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

# Thread lock for synchronizing access to api_keys.json
api_keys_lock = threading.Lock()

# Define the request and response models
class Message(BaseModel):
    role: str
    content: str

class ChatCompletionRequest(BaseModel):
    messages: List[Message]
    model: str

class ChatCompletionResponse(BaseModel):
    content: str

def generate_api_key(length=30):
    """Generates a random API key of the given length."""
    characters = string.ascii_letters + string.digits
    api_key = ''.join(random.choice(characters) for _ in range(length))
    return api_key

def load_api_keys():
    """Loads API keys from the JSON file."""
    with api_keys_lock:
        if not os.path.exists('api_keys.json'):
            return {}
        with open('api_keys.json', 'r') as f:
            return json.load(f)

def save_api_keys(api_keys):
    """Saves API keys to the JSON file."""
    with api_keys_lock:
        with open('api_keys.json', 'w') as f:
            json.dump(api_keys, f, indent=4)

def add_api_key(client_name):
    """Generates a new API key, adds it to api_keys.json, and returns the key."""
    api_keys = load_api_keys()
    new_key = generate_api_key()
    api_keys[new_key] = client_name
    save_api_keys(api_keys)
    return new_key

@app.post("/generate_api_key")
async def generate_api_key_endpoint(client_name: str):
    """API endpoint to generate a new API key."""
    new_key = add_api_key(client_name)
    return {"api_key": new_key}

@app.post("/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(
    request: ChatCompletionRequest,
    api_key: Optional[str] = Header(None),
):
    # Load the API keys
    api_keys = load_api_keys()

    # Authenticate the client using the wrapper's API key
    if api_key not in api_keys:
        raise HTTPException(status_code=401, detail="Invalid API Key")

    try:
        # Forward the request to the master API
        chat_completion = master_client.chat.completions.create(
            messages=[message.dict() for message in request.messages],
            model=request.model,
        )

        # Extract the response content
        content = chat_completion.choices[0].message.content

        # Return the response to the client
        return ChatCompletionResponse(content=content)

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


In [None]:
import os
from fastapi import FastAPI, HTTPException, Header
from pydantic import BaseModel
from typing import List, Optional
from cerebras.cloud.sdk import Cerebras
from groq import Groq

app = FastAPI()

# Define the request and response models
class Message(BaseModel):
    role: str
    content: str

class ChatCompletionRequest(BaseModel):
    messages: List[Message]
    model: str
    service_name: str  # Service to use: 'cerebras' or 'groq'

class ChatCompletionResponse(BaseModel):
    content: str

# API keys for the services
SERVICE_API_KEYS = {
    "cerebras": os.environ.get("CEREBRAS_API_KEY"),
    "groq": os.environ.get("GROQ_API_KEY"),
}

# Ensure all necessary API keys are provided
for service, api_key in SERVICE_API_KEYS.items():
    if not api_key:
        raise Exception(f"API key for {service} not set in environment variables.")

def get_client(service_name: str):
    """Get the appropriate client based on the service name."""
    service_name = service_name.lower()
    if service_name == "cerebras":
        return Cerebras(api_key=SERVICE_API_KEYS["cerebras"])
    elif service_name == "groq":
        return Groq(api_key=SERVICE_API_KEYS["groq"])
    else:
        raise ValueError(f"Unsupported service_name: {service_name}")

@app.post("/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(
    request: ChatCompletionRequest,
    api_key: Optional[str] = Header(None),
):
    service_name = request.service_name.lower()

    try:
        client = get_client(service_name)
        messages = [message.dict() for message in request.messages]

        if service_name == "cerebras":
            chat_completion = client.chat.completions.create(
                messages=messages,
                model=request.model,
            )
            content = chat_completion  # Adjust as per actual response

        elif service_name == "groq":
            chat_completion = client.chat.completions.create(
                messages=messages,
                model=request.model,
            )
            content = chat_completion.choices[0].message.content

        else:
            raise ValueError(f"Unsupported service_name: {service_name}")

        return ChatCompletionResponse(content=content)

    except ValueError as ve:
        raise HTTPException(status_code=400, detail=str(ve))
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

In [None]:
!uvicorn wrapper_service:app --reload


In [3]:
#New Key

import requests

url = "http://localhost:8000/generate_api_key"
data = {
    "name": "John Doe",
    "email": "john.doe@example.com"
}

response = requests.post(url, json=data)

if response.status_code == 200:
    api_key = response.json()["api_key"]
    print(f"Your API Key: {api_key}")
else:
    print(f"Error: {response.status_code} - {response.text}")

Your API Key: ljoWc7l91mdURBRNTNPlZQlGpV5OMo


In [13]:
#LLM Inference

import requests

api_key = "ljoWc7l91mdURBRNTNPlZQlGpV5OMo"  # Replace with your actual API key

url = "http://localhost:8000/chat/completions"
headers = {
    "Content-Type": "application/json",
    "api-key": api_key,
}

data = {
    "messages": [
        {"role": "user", "content": "Explain the importance of fast language models"}
    ],
    "model": "llama3.1-8b-8192",
    "service_name": "cerebras",
}

response = requests.post(url, json=data, headers=headers)

if response.status_code == 200:
    result = response.json()
    print(result["content"])
else:
    print(f"Error {response.status_code}: {response.text}")

Error 400: {"detail":"Unsupported service_name: cerebra"}


In [None]:
import os
import threading
import json
from datetime import datetime
from fastapi import FastAPI, HTTPException, Header, Depends
from pydantic import BaseModel, EmailStr
from typing import List
from cerebras.cloud.sdk import Cerebras
from groq import Groq
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, func
from sqlalchemy.orm import declarative_base, sessionmaker

app = FastAPI()

# Thread lock for synchronizing access to api_keys.json
api_keys_lock = threading.Lock()

# Database setup
DATABASE_URL = "postgresql://username:password@host:port/database_name"

engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()

# Define the ChatLog model
class ChatLog(Base):
    __tablename__ = 'chat_logs'
    
    id = Column(Integer, primary_key=True, index=True)
    timestamp = Column(DateTime(timezone=True), server_default=func.now())
    client_name = Column(String(100))
    client_email = Column(String(100))
    service_name = Column(String(50))
    model_name = Column(String(100))
    client_message = Column(Text)
    content_in_response = Column(Text)

# Create the tables in the database
Base.metadata.create_all(bind=engine)

# Master API keys
MASTER_SERVICE_API_KEYS = {
    "cerebras": os.environ.get("CEREBRAS_API_KEY"),
    "groq": os.environ.get("GROQ_API_KEY"),
    # Add more services as needed
}

for service, api_key in MASTER_SERVICE_API_KEYS.items():
    if not api_key:
        raise Exception(f"Master API key for {service} is not set in environment variables.")

# Models
class Message(BaseModel):
    role: str
    content: str

class ChatCompletionRequest(BaseModel):
    messages: List[Message]
    model: str
    service_name: str

class ChatCompletionResponse(BaseModel):
    content: str

class GenerateApiKeyRequest(BaseModel):
    name: str
    email: EmailStr

def generate_api_key(length=30):
    import random
    import string
    characters = string.ascii_letters + string.digits
    return ''.join(random.choice(characters) for _ in range(length))

def load_api_keys():
    with api_keys_lock:
        if not os.path.exists('api_keys.json'):
            return {}
        with open('api_keys.json', 'r') as f:
            return json.load(f)

def save_api_keys(api_keys):
    with api_keys_lock:
        with open('api_keys.json', 'w') as f:
            json.dump(api_keys, f, indent=4)

def add_api_key(client_name, client_email):
    api_keys = load_api_keys()
    new_key = generate_api_key()
    api_keys[new_key] = {
        "name": client_name,
        "email": client_email
    }
    save_api_keys(api_keys)
    return new_key

def authenticate_client(api_key: str = Header(...)):
    api_keys = load_api_keys()
    if api_key not in api_keys:
        raise HTTPException(status_code=401, detail="Invalid API Key")
    return api_key

def get_master_client(service_name: str):
    service_name = service_name.lower()
    if service_name == "cerebras":
        api_key = MASTER_SERVICE_API_KEYS["cerebras"]
        return Cerebras(api_key=api_key)
    elif service_name == "groq":
        api_key = MASTER_SERVICE_API_KEYS["groq"]
        return Groq(api_key=api_key)
    else:
        raise ValueError(f"Unsupported service_name: {service_name}")

def log_to_database(log_entry):
    session = SessionLocal()
    try:
        session.add(log_entry)
        session.commit()
    except Exception as e:
        session.rollback()
        print(f"Error logging to database: {e}")
    finally:
        session.close()

@app.post("/generate_api_key")
async def generate_api_key_endpoint(request: GenerateApiKeyRequest):
    client_name = request.name
    client_email = request.email

    api_keys = load_api_keys()
    for key_info in api_keys.values():
        if key_info['email'].lower() == client_email.lower():
            raise HTTPException(status_code=400, detail="An API key has already been generated for this email.")

    new_key = add_api_key(client_name, client_email)
    return {"api_key": new_key}

@app.post("/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(
    request: ChatCompletionRequest,
    api_key: str = Depends(authenticate_client),
):
    service_name = request.service_name.lower()
    model_name = request.model

    try:
        client = get_master_client(service_name)
        messages = [message.dict() for message in request.messages]

        # Retrieve client information
        api_keys = load_api_keys()
        client_info = api_keys[api_key]
        client_name = client_info['name']
        client_email = client_info['email']

        # Extract client message
        client_message = ' '.join(
            [msg.content for msg in request.messages if msg.role == 'user']
        )

        # Process the request
        if service_name == "cerebras":
            chat_completion = client.chat.completions.create(
                messages=messages,
                model=model_name,
            )
            content_in_response = chat_completion.get('content', '')

        elif service_name == "groq":
            chat_completion = client.chat.completions.create(
                messages=messages,
                model=model_name,
            )
            content_in_response = chat_completion.choices[0].message.content

        else:
            raise ValueError(f"Unsupported service_name: {service_name}")

        # Log the data to the database
        log_entry = ChatLog(
            client_name=client_name,
            client_email=client_email,
            service_name=service_name,
            model_name=model_name,
            client_message=client_message.strip(),
            content_in_response=content_in_response.strip()
        )
        log_to_database(log_entry)

        return ChatCompletionResponse(content=content_in_response)

    except ValueError as ve:
        raise HTTPException(status_code=400, detail=str(ve))
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


In [14]:
from typing import List, Optional
import json

from pydantic import BaseModel
from groq import Groq

groq = Groq(
    api_key="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"
)


# Data model for LLM to generate
class Ingredient(BaseModel):
    name: str
    quantity: str
    quantity_unit: Optional[str]


class Recipe(BaseModel):
    recipe_name: str
    ingredients: List[Ingredient]
    directions: List[str]


def get_recipe(recipe_name: str) -> Recipe:
    chat_completion = groq.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are a recipe database that outputs recipes in JSON.\n"
                # Pass the json schema to the model. Pretty printing improves results.
                f" The JSON object must use the schema: {json.dumps(Recipe.model_json_schema(), indent=2)}",
            },
            {
                "role": "user",
                "content": f"Fetch a recipe for {recipe_name}",
            },
        ],
        model="llama3-8b-8192",
        temperature=0,
        # Streaming is not supported in JSON mode
        stream=False,
        # Enable JSON mode by setting the response format
        response_format={"type": "json_object"},
    )
    return Recipe.model_validate_json(chat_completion.choices[0].message.content)


def print_recipe(recipe: Recipe):
    print("Recipe:", recipe.recipe_name)

    print("\nIngredients:")
    for ingredient in recipe.ingredients:
        print(
            f"- {ingredient.name}: {ingredient.quantity} {ingredient.quantity_unit or ''}"
        )
    print("\nDirections:")
    for step, direction in enumerate(recipe.directions, start=1):
        print(f"{step}. {direction}")


recipe = get_recipe("apple pie")
# print(type(recipe))
print_recipe(recipe)

Recipe: Apple Pie

Ingredients:
- Flour: 2 1/4 cups cups
- Cold Butter: 1 cup cups
- Granulated Sugar: 1/2 cup cups
- Salt: 1/4 teaspoon teaspoons
- Ground Cinnamon: 1/2 teaspoon teaspoons
- Ground Nutmeg: 1/4 teaspoon teaspoons
- Eggs: 1 whole
- Apple Filling: 6-8 cups cups

Directions:
1. Preheat oven to 375°F (190°C).
2. Make the crust: In a large bowl, combine flour, salt, and cold butter. Use a pastry blender or your fingers to work the butter into the flour until the mixture resembles coarse crumbs.
3. Add the sugar, cinnamon, and nutmeg to the flour mixture and stir until combined.
4. Gradually add ice-cold water, stirring with a fork until the dough comes together in a ball.
5. Turn the dough out onto a lightly floured surface and knead a few times until it becomes smooth and pliable.
6. Divide the dough in half and shape each half into a disk. Wrap each disk in plastic wrap and refrigerate for at least 30 minutes.
7. Make the filling: Peel, core, and slice the apples. In a lar

In [20]:
import os

from groq import Groq

client = Groq(
    api_key="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"
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Explain the importance of fast language models",
        }
    ],
    model="llama3-8b-8192",
)

print(chat_completion)

ChatCompletion(id='chatcmpl-cc4ce3aa-7b44-4e95-8736-929294a39990', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Fast language models, also known as efficient language models, have gained significant attention in recent years due to their importance in various applications. Here are some reasons why fast language models are crucial:\n\n1. **Real-time processing**: Fast language models enable real-time processing of natural language data, which is essential for many applications such as chatbots, virtual assistants, and language translation systems.\n2. **Scalability**: Fast language models can process large volumes of data efficiently, making them suitable for big data applications, such as text analysis, sentiment analysis, and topic modeling.\n3. **Improved accuracy**: Fast language models can be trained on large datasets, leading to improved accuracy and better performance in downstream tasks such as language translation, text s

In [21]:
import os
from cerebras.cloud.sdk import Cerebras

client = Cerebras(
    # This is the default and can be omitted
    api_key="csk-e2e8kypw838rwmpjxd9nx2vn5jrertm339fnrcnt9c6p8hmx"
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Why is fast inference important?",
        }
],
    model="llama3.1-8b",
)

print(chat_completion)

ChatCompletion(id='chatcmpl-b0f17433-53ab-442b-93bb-be045b6385ba', choices=[Choice(finish_reason='stop', index=0, message=ChoiceMessage(role='assistant', content="Fast inference is crucial in various applications, particularly in deep learning and artificial intelligence (AI), due to several factors that drive the need for efficient and rapid processing of model predictions, classifications, or outputs. These applications often rely on complex models that consume significant computational resources and require fast response times to maintain or improve their functionality and user experience. Here are some key reasons why fast inference is important:\n\n1. **User Experience:** For applications that require real-time processing, such as chatbots, voice assistants, self-driving cars, or live video analytics, fast inference ensures a seamless and responsive experience. Users expect instant or near-instant feedback from these systems, making speed of inference critical.\n\n2. **Scalability