In [1]:
pip install groq

Note: you may need to restart the kernel to use updated packages.


In [2]:
from groq import Groq

In [6]:
from dotenv import load_dotenv
load_dotenv()

groq_api_key = os.environ.get('GROQ_API_KEY')

In [27]:
groq_client = Groq(api_key=groq_api_key)

In [63]:
completion = groq_client.chat.completions.create(
    model="meta-llama/llama-4-scout-17b-16e-instruct",
    messages=[
      {
        "role": "user",
        "content": "Do you have daily limit for free tier api users?"
      }
    ],
    temperature=1,
    max_completion_tokens=1024,
    top_p=1,
    stream=True,
    stop=None,
)

In [64]:
completion

<groq.Stream at 0x7b6e079e38c0>

In [65]:
answer = ""

for chunk in completion:
    content = chunk.choices[0].delta.content or ""
    answer += content

In [66]:
print(answer)

I'm assuming you're referring to the API usage limits for free tier users. 

For the free tier, **yes**, there are daily limits on API usage to prevent abuse and ensure fair usage. The specific limits can vary depending on the API or service provider.

Here are a few examples:

*   **OpenAI API (which powers me)**: The free tier has a limit of 3 requests per minute and 200 requests per day for the text-based models like myself.
*   **Google Cloud APIs**: Most Google Cloud APIs have a free tier limit of 2 million requests per day, but some APIs have lower or higher limits.
*   **Twitter API**: The free tier has a limit of 150 requests per 15-minute window, which translates to around 600 requests per hour or 14,400 requests per day.

Keep in mind that these limits can change over time, and some APIs might have different limits for different types of requests (e.g., read vs. write requests).

If you're planning to use an API extensively, I recommend checking the API documentation or conta

In [1]:
import re

In [2]:
def extract_problem_and_resolution(text):
    pattern = r"""
        [#\*\s]*CORE\s+PROBLEM\s+STATEMENT[:\s]*[#\*\s]*
        (.*?)                                  
        [#\*\s]*RESOLUTION[:\s]*[#\*\s]*        
        (.*)                          
    """

    match = re.search(pattern, text, re.DOTALL | re.IGNORECASE | re.VERBOSE)
    if match:
        problem = match.group(1).strip()
        resolution = match.group(2).strip()
        return problem, resolution
    else:
        return None, None

In [3]:
text = "**CORE PROBLEM STATEMENT****\nThis is the problem shi shi\n**RESOLUTION:**\nThis is the resolution.\nNote:note"

In [4]:
print(text)

**CORE PROBLEM STATEMENT****
This is the problem shi shi
**RESOLUTION:**
This is the resolution.
Note:note


In [5]:
extract_problem_and_resolution(text)

('This is the problem shi shi', 'This is the resolution.\nNote:note')

In [10]:
def split_query_and_issue_type(text: str):
    pattern = r"""
        [#\*\s]*QUERY\s+TYPE[:\s]*
        (.*?)                                  
        [#\*\s]*SPECIFIC\s+ISSUE\s+TYPE[:\s]*        
        (.*)              
    """

    match = re.search(pattern, text, re.DOTALL | re.IGNORECASE | re.VERBOSE)
    if match:
        query_type = match.group(1).strip()
        specific_issue_type = match.group(2).strip()
        return query_type, specific_issue_type
    else:
        return None, None

In [11]:
text = "QUERY TYPE:\nProduct & Delivery Issues\nSPECIFIC ISSUE TYPE:\nFollow-up on delayed delivery"

In [12]:
print(text)

QUERY TYPE:
Product & Delivery Issues
SPECIFIC ISSUE TYPE:
Follow-up on delayed delivery


In [13]:
split_query_and_issue_type(text)

('Product & Delivery Issues', 'Follow-up on delayed delivery')

In [2]:
def split_answer_and_priority(text: str):
    pattern = r"""
        [#\*\s]*ANSWER[:\s\*\#]*
        (.*?)                                  
        [#\*\s]*PRIORITY[:\s\*\#]*        
        (.*)              
    """

    match = re.search(pattern, text, re.DOTALL | re.IGNORECASE | re.VERBOSE)
    if match:
        answer = match.group(1).strip()
        priority = match.group(2).strip()
        return answer, priority
    else:
        return None, None

In [35]:
text = "##**answer: **###This is the answerPRIORITY:\nThis is the Priority"

In [36]:
print(text)

##**answer: **###This is the answerPRIORITY:
This is the Priority


In [37]:
split_answer_and_priority(text)

('This is the answer', 'This is the Priority')

In [28]:
from fastembed import TextEmbedding

In [31]:
dimension = 1024

In [33]:
import json

for model in TextEmbedding.list_supported_models():
    if model['dim'] == dimension:
        print(json.dumps(model, indent=2))

{
  "model": "BAAI/bge-large-en-v1.5",
  "sources": {
    "hf": "qdrant/bge-large-en-v1.5-onnx",
    "url": null,
    "_deprecated_tar_struct": false
  },
  "model_file": "model.onnx",
  "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: not so necessary, 2023 year.",
  "license": "mit",
  "size_in_GB": 1.2,
  "additional_files": [],
  "dim": 1024,
  "tasks": {}
}
{
  "model": "mixedbread-ai/mxbai-embed-large-v1",
  "sources": {
    "hf": "mixedbread-ai/mxbai-embed-large-v1",
    "url": null,
    "_deprecated_tar_struct": false
  },
  "model_file": "onnx/model.onnx",
  "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year.",
  "license": "apache-2.0",
  "size_in_GB": 0.64,
  "additional_files": [],
  "dim": 1024,
  "tasks": {}
}
{
  "model": "snowflake/snowflake-arctic-embed-l",
  "sources": {
    "hf": "snowflake/snowflake-arctic-e

In [34]:
model_handle = "BAAI/bge-large-en-v1.5"

In [1]:
import ollama

In [4]:
print(ollama.list()) 

models=[Model(model='mistral:latest', modified_at=datetime.datetime(2025, 6, 30, 10, 44, 56, 57782, tzinfo=TzInfo(+08:00)), digest='3944fe81ec14610e0852c3d915768ee8d507ea541387fdfcbbf9edaa0c757734', size=4113301822, details=ModelDetails(parent_model='', format='gguf', family='llama', families=['llama'], parameter_size='7.2B', quantization_level='Q4_0'))]


In [6]:
ollama.pull("mistral:latest")

ProgressResponse(status='success', completed=None, total=None, digest=None)

In [7]:
# Simple generation
resp = ollama.generate(model="mistral", prompt="Explain quantum computing in simple terms")
print(resp.response.strip())
print(resp["choices"][0]["message"]["content"])

# Stateful chat
messages = [{"role": "user", "content": "What’s the capital of France?"}]
chat_resp = ollama.chat(model="mistral", messages=messages)
print(chat_resp["message"]["content"])


KeyError: 'choices'

In [13]:
print(resp.response.strip())

Quantum computing is a type of computation that uses the principles of quantum mechanics to process information. Unlike classical computers, which use bits (0s and 1s) to store and manipulate data, quantum computers use quantum bits, or qubits.

Qubits can exist in multiple states at once thanks to a property called superposition. This means that a single qubit can represent a 0, a 1, or both simultaneously until it is measured. Additionally, qubits have another unique property called entanglement, which allows them to be instantly connected with each other regardless of distance.

These properties allow quantum computers to perform certain calculations much faster than classical computers. For example, solving complex mathematical problems, simulating quantum systems, and breaking encryption codes are tasks that could potentially be done significantly quicker on a quantum computer compared to a classical one.

However, building and maintaining quantum computers is extremely challengin

In [14]:
resp

GenerateResponse(model='mistral', created_at='2025-06-30T02:46:33.085859726Z', done=True, done_reason='stop', total_duration=15498462010, load_duration=6272093, prompt_eval_count=11, prompt_eval_duration=259245674, eval_count=233, eval_duration=15232462941, response=' Quantum computing is a type of computation that uses the principles of quantum mechanics to process information. Unlike classical computers, which use bits (0s and 1s) to store and manipulate data, quantum computers use quantum bits, or qubits.\n\nQubits can exist in multiple states at once thanks to a property called superposition. This means that a single qubit can represent a 0, a 1, or both simultaneously until it is measured. Additionally, qubits have another unique property called entanglement, which allows them to be instantly connected with each other regardless of distance.\n\nThese properties allow quantum computers to perform certain calculations much faster than classical computers. For example, solving comple

In [16]:
import requests

API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1"
headers = {
    "Authorization": "Bearer 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"
}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)

    print(f"Status Code: {response.status_code}")
    print(f"Raw Response: {response.text}")

    try:
        return response.json()
    except requests.exceptions.JSONDecodeError:
        return {"error": "Failed to decode JSON", "content": response.text}


output = query({
    "inputs": "Explain quantum computing in simple terms.",
    "parameters": {
        "temperature": 0.7,
        "max_new_tokens": 100
    }
})

print(output)


Status Code: 404
Raw Response: Not Found
{'error': 'Failed to decode JSON', 'content': 'Not Found'}


In [28]:

if not api_key:
    raise ValueError("API key not set in environment variable.")

headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json",
}

data = {
    "model": "mistralai/mistral-7b-instruct",
    "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain black holes in simple terms."}
    ]
}

response = requests.post("https://openrouter.ai/api/v1/chat/completions",
                         headers=headers,
                         data=data)

print("Status Code:", response.status_code)
print("Response Text:", response.text)

try:
    output = response.json()
    print("Assistant:", output)
except json.JSONDecodeError:
    print("❌ Failed to decode JSON. Response may be an error.")


Status Code: 400
Response Text: {"error":{"message":"Invalid JSON","code":400},"user_id":"user_2zD6DTXc71ApEwuGk1OSfcDBQt0"}
Assistant: {'error': {'message': 'Invalid JSON', 'code': 400}, 'user_id': 'user_2zD6DTXc71ApEwuGk1OSfcDBQt0'}


In [5]:
import requests
import json
import os

api_key = os.environ.get("OPENROUTER_API_KEY")

In [6]:
from openai import OpenAI

In [7]:
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=api_key,
    # Optional: Add headers for OpenRouter's analytics and ranking
    # default_headers={
    #     "HTTP-Referer": "YOUR_APP_URL",  # Replace with your app's URL
    #     "X-Title": "YOUR_APP_NAME",     # Replace with your app's name
    # },
)

In [8]:
model = "google/gemini-2.0-flash-001"

completion = client.chat.completions.create(
    model=model,
    messages=[
        {"role": "user", "content": "Who are you?"},
    ],
)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [6]:
import os

In [7]:
from google import genai

In [None]:
gemini_api_key = os.environ.get('GEMINI_API_KEY')

In [9]:
gemini_api_key = os.environ.get('GEMINI_API_KEY')
gemini_client = genai.Client(api_key=gemini_api_key)

ValueError: Missing key inputs argument! To use the Google AI API, provide (`api_key`) arguments. To use the Google Cloud API, provide (`vertexai`, `project` & `location`) arguments.

In [None]:
model = genai.GenerativeModel('gemini-2.0-flash')


In [10]:
from dotenv import load_dotenv
load_dotenv()

True

In [21]:
from google import genai
# from google.genai import configure, GenerativeModel, GenerationConfig

In [None]:
from google import generativeai as genai

In [30]:
from google import genai

In [31]:
gemini_api_key = os.environ.get('GEMINI_API_KEY')
gemini_client = genai.Client(api_key=gemini_api_key)

In [33]:
response = gemini_client.models.generate_content(
    model='gemini-2.0-flash',  # or 'gemini-1.0-pro' depending on your version
    contents="Who are you?",
    parameters={
        "temperature": 0.7,
        "top_p": 0.8,
        "top_k": 40,
        "max_output_tokens": 1024,
    }
)

print(response.text)

TypeError: Models.generate_content() got an unexpected keyword argument 'parameters'

In [34]:
genai.models.Models.generate_content(
    model='gemini-2.0-flash',
    contents="Who are you?",
)

TypeError: Models.generate_content() missing 1 required positional argument: 'self'

In [35]:
model = genai.models.Models('gemini-2.0-flash')

In [36]:
model.generate_content(
    model='gemini-2.0-flash',
    contents="Who are you?",
)

AttributeError: 'str' object has no attribute 'vertexai'

In [40]:
from openai import OpenAI

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key="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",
)

In [41]:
completion = client.chat.completions.create(
  model="google/gemini-2.5-flash-preview-05-20",
  messages=[
    {
      "role": "user",
      "content": "What is the meaning of life?"
    }
  ]
)

print(completion.choices[0].message.content)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)