Assistants API

https://openai.com/blog/new-models-and-developer-products-announced-at-devday

https://platform.openai.com/docs/assistants/overview

https://platform.openai.com/playground

In [None]:
!pip install -q -U openai sentence-transformers diffusers["torch"] transformers

IMPORT AND SETUP

In [None]:
import os
from openai import OpenAI

from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [None]:
import time
from openai import OpenAI
# Initialize the client
client = OpenAI(
  organization="towards-ai",
)

## Download Dataset

In [None]:
!wget -O botsdata.pdf https://raw.githubusercontent.com/jaiganesan-n/dataset_building_ai_for_production/main/botsdata.pdf
!wget -O tech_manual.pdf https://raw.githubusercontent.com/jaiganesan-n/dataset_building_ai_for_production/main/tech_manual.pdf

## RESPONSES API WITH TECH MANUAL

In [None]:
# Upload Files for Knowledge Base
# Upload tech manual for the agent to use
file_resp = client.files.create(
    file=open("tech_manual.pdf", "rb"),
    purpose='assistants'  # For Responses API, use 'user_data' instead of 'assistants'
)
file_id = file_resp.id

print(f"File uploaded: {file_id}")

In [None]:
# Create a vector store and upload your file
vector_store = client.vector_stores.create(name="TechManualStore")
vs_id = vector_store.id
client.vector_stores.files.create(vector_store_id=vs_id, file_id=file_resp.id)

In [None]:
# Ask a question using file_search
response = client.responses.create(
    model="gpt-4.1-mini",
    instructions="You are a robotics expert. Use the manual to answer accurately.",
    input="Explain Auditory and Visual (A/V) Warning System",
    tools=[{
        "type": "file_search",
        "vector_store_ids": [vs_id],
        "max_num_results": 3
    }],
    include=["output[*].file_search_call.search_results"]
)

# Review the assistant’s answer and search metadata
print(response.output_text)

In [None]:
file_search_call = response.output[0]
search_results = file_search_call.results  # ← Correct property

for res in search_results:
    print(f"\n— Source: {res.filename} (score {res.score:.2f})")
    print("Excerpt:", res.text[:200].replace("\n", " "), "…")

## BOTSDATA RESEARCH ASSISTANT

In [None]:
# Upload botsdata file for the research assistant
botsdata_file = client.files.create(
    file=open("botsdata.pdf", "rb"),
    purpose='assistants'
)
print(f"Botsdata file uploaded: {botsdata_file.id}")

# Create vector store for botsdata
botsdata_vector_store = client.vector_stores.create(name="BotsdataStore")

client.vector_stores.files.create(
    vector_store_id=botsdata_vector_store.id,
    file_id=botsdata_file.id
)

tools = [
    {"type": "file_search", "vector_store_ids": [botsdata_vector_store.id], "max_num_results": 3},
    {"type": "code_interpreter", "container": {"type": "auto","file_ids": [botsdata_file.id]}},
    {"type": "web_search"}
]

In [None]:
# Research Query 1: Robotics Technology Analysis

research_response_1 = client.responses.create(
    model="gpt-4.1-mini",

    instructions="""You are an advanced research and robotics assistant with expertise in:
    - Advanced robotics and automation technologies
    - Botnet analysis and cybersecurity (based on your database)
    - Technology research and analysis
    - Machine learning conference compilation
    Use your knowledge base to provide detailed, technical responses.""",

    input="Can you provide a detailed analysis of the latest advancements in robotics technology based on the uploaded research data?",

    tools=tools,

    include=["output[*].file_search_call.search_results"]
)


print("Robotics Analysis Response:")
print(research_response_1.output_text)

In [None]:
# Research Query 2: Conference List Compilation

research_response_2 = client.responses.create(
     model="gpt-4.1-mini",

    instructions="""You are an expert research assistant. Compile comprehensive lists of academic conferences,
    especially in machine learning, robotics, and cybersecurity. Use both your knowledge base and web search.""",

    input="Compile a list of upcoming machine learning conferences including dates, locations, and focus areas for 2025",
    tools=tools
)

print("\nML Conferences List:")
print(research_response_2.output_text)

In [None]:
# Research Query 3: Botnet Analysis (based on database content)
botnet_response = client.responses.create(
    model="gpt-4.1-mini",

    instructions="""Based on your database about botnets, provide expert analysis on botnet detection and mitigation.
    Your database includes information on command-and-control structures, bot families, and detection methods.""",

    input="Analyze the behavior patterns of different botnet families and recommend detection strategies",
    tools=tools
)

print("\nBotnet Analysis:")
print(botnet_response.output_text)

## HUGGINGFACE API &InferenceApi INTEGRATIONS

In [None]:
import requests
import json

os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

# Sentiment Analysis Workflow
headers = {"Authorization": f"Bearer {userdata.get('HF_TOKEN')}"}

# Sentiment analysis with updated model
sentiment_url = "https://api-inference.huggingface.co/models/siebert/sentiment-roberta-large-english"
sentiment_inputs = {
    "inputs": ["I love this product!", "I'm frustrated by the frequent errors in the software's latest update"]
}

sentiment_response = requests.post(sentiment_url, headers=headers, json=sentiment_inputs)
print("Sentiment Analysis Results:")
print(sentiment_response.json())

In [None]:
from huggingface_hub import InferenceClient
client = InferenceClient(provider="hf-inference")

In [None]:
# Text Classification
response = client.text_classification(
    text="I love how this app simplifies complex tasks effortlessly. I'm frustrated by the frequent errors in the software's latest update",
    model="cardiffnlp/twitter-roberta-base-sentiment"
)
print(response)

In [None]:
# from huggingface_hub import InferenceClient

# client = InferenceClient(provider="hf-inference")

# response = client.text_generation(
#     prompt="The new president of America will be",
#     model="gpt2",
# )

# print(response.generated_text)


## Image Generation Workflow

In [None]:
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import torch
from PIL import Image
from IPython.display import display

# Load current Stable Diffusion model with optimizations
model_id = "runwayml/stable-diffusion-v1-5"
pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    safety_checker=None,
    requires_safety_checker=False
)

# Use faster scheduler
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

# Use faster scheduler
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

# Choose memory optimization strategy based on available resources
device = "cuda" if torch.cuda.is_available() else "cpu"

if device == "cuda":
    # GPU available - use direct GPU placement for best performance
    pipe = pipe.to(device)
    # Enable attention slicing for memory efficiency
    if hasattr(pipe, 'enable_attention_slicing'):
        pipe.enable_attention_slicing()
else:
    # CPU only - use model offloading for memory efficiency
    if hasattr(pipe, 'enable_model_cpu_offload'):
        pipe.enable_model_cpu_offload()
    if hasattr(pipe, 'enable_attention_slicing'):
        pipe.enable_attention_slicing()

# Generate futuristic cityscape
prompt = "Create an image of a futuristic cityscape on an alien planet, featuring towering skyscrapers with glowing neon lights, a sky filled with multiple moons, and inhabitants of various alien species walking through vibrant market streets"

image = pipe(
    prompt,
    num_inference_steps=25,
    guidance_scale=7.5,
    height=512,
    width=512
).images[0]

image.save("futuristic_cityscape.png")

display(image)

## SENTENCE TRANSFORMERS EMBEDDINGS

In [None]:
from sentence_transformers import SentenceTransformer

sentences = [
    "GAIA's questions are rooted in practical use cases, requiring AI systems to interact with a diverse and uncertain world, reflecting real-world applications.",
    "GAIA questions require accurate execution of complex sequences of actions, akin to the Proof of Work concept, where the solution is simple to verify but challenging to generate."
]

# Use current high-performance model
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(sentences)
print(f"Embeddings shape: {embeddings.shape}")
print(f"Sample embedding dimensions: {embeddings[0][:10]}")

## IMAGE PROCESSING AND ANALYSIS

In [None]:
from transformers import pipeline, ViTImageProcessor, ViTForImageClassification
from PIL import Image
import requests
import torch

model_name="nlpconnect/vit-gpt2-image-captioning"

processor = ViTImageProcessor.from_pretrained(model_name, use_fast=True)

# Image Captioning
image_captioner = pipeline(
    "image-to-text",
    model=model_name,
    feature_extractor=processor,
    device=0 if torch.cuda.is_available() else -1
)

# Caption sample image
caption_result = image_captioner("https://ankur3107.github.io/assets/images/image-captioning-example.png")
print("Image Captioning Result:", caption_result)

In [None]:
# Image Classification with ViT
image_url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(image_url, stream=True).raw)

processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

inputs = processor(images=image, return_tensors="pt")
outputs = vit_model(**inputs)
logits = outputs.logits

predicted_class_idx = logits.argmax(-1).item()
print("Predicted class:", vit_model.config.id2label[predicted_class_idx])

## WEB CONTENT EXTRACTION AND PROCESSING

In [None]:
# Web content extraction
def extract_web_content(url):
    """Extract content from URL using requests (fallback method)"""
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        response = requests.get(url, headers=headers, timeout=10)
        return response.text[:5000]
    except:
        return "Could not extract content from URL"

# Example content extraction
sample_url = "https://techcrunch.com/2023/11/25/neuralink-elon-musks-brain-implant-startup-quietly-raises-an-additional-43m/"
extracted_content = extract_web_content(sample_url)
print(f"Extracted content length: {len(extracted_content)} characters")

In [None]:
def summarize_content(text):
    """Summarize text using HuggingFace API"""
    if not text.strip():
        return {"error": "No text to summarize"}

    summarizer_url = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"

    # Truncate text if too long
    max_length = 2000
    if len(text) > max_length:
        text = text[:max_length]

    response = requests.post(
        summarizer_url,
        headers={"Authorization": f"Bearer {userdata.get('HF_TOKEN')}"},
        json={
            "inputs": text,
            "parameters": {
                "max_length": 250,
                "min_length": 50,
                "do_sample": False
            }
        }
    )

    return response.json()

# Summarize extracted content
summary_result = summarize_content(extracted_content)
print("Summarization Result:")
print(summary_result)