# SETUP

In [None]:
import os
import time
import json
import boto3
import logging
from pathlib import Path

In [None]:
# Setup logging
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
ROLE_TO_ASSUME = Path(os.path.join(os.environ["HOME"],"BedrockCrossAccount.txt")).read_text().strip()
logger.info(f"ROLE_TO_ASSUME={ROLE_TO_ASSUME}")

In [None]:
import boto3
import datetime
from botocore.session import get_session
from botocore.credentials import RefreshableCredentials

# ARN of Role A to assume  
role_to_assume = 'arn:aws:iam::605134468121:role/BedrockCrossAccount'

def get_credentials():
    sts_client = boto3.client('sts')
    assumed_role = sts_client.assume_role(
        RoleArn=role_to_assume,
        RoleSessionName='cross-account-session',
        # Don't set DurationSeconds when role chaining
    )
    return {
        'access_key': assumed_role['Credentials']['AccessKeyId'],
        'secret_key': assumed_role['Credentials']['SecretAccessKey'],
        'token': assumed_role['Credentials']['SessionToken'],
        'expiry_time': assumed_role['Credentials']['Expiration'].isoformat()
    }

session = get_session()
refresh_creds = RefreshableCredentials.create_from_metadata(
    metadata=get_credentials(),
    refresh_using=get_credentials,
    method='sts-assume-role'
)

# Create a new session with refreshable credentials
session._credentials = refresh_creds
boto3_session = boto3.Session(botocore_session=session)

In [None]:
region: str = "us-west-2"

ChatBedrock is an API interface that lets users interact with LLMs on Amazon Bedrock, similar to how OpenAI’s API works for GPT models.

In [None]:
from langchain_aws import ChatBedrockConverse
import boto3

# ---- ⚠️ Update region for your AWS setup ⚠️ ----
bedrock_client = boto3_session.client("bedrock-runtime",
                              region_name=region)

In [None]:
llm = ChatBedrockConverse(
    client=bedrock_client,
    model_id="us.amazon.nova-micro-v1:0",
)

# Steps to Implement RAG for an Image Dataset:
## 1. Preprocess Images

In [None]:
import os
import torch
import faiss
import numpy as np
from PIL import Image
from torchvision import transforms
from transformers import CLIPProcessor, CLIPModel

# Load CLIP model & processor
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Folder containing images
IMAGE_FOLDER = "data/"
image_files = [f for f in os.listdir(IMAGE_FOLDER) if f.endswith((".png", ".jpg", ".jpeg"))]

# Function to process and get embeddings for images
def get_image_embedding(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        embedding = model.get_image_features(**inputs)
    return embedding.cpu().numpy().flatten()

# Extract embeddings for all images
embeddings = []
image_paths = []
for image_file in image_files:
    image_path = os.path.join(IMAGE_FOLDER, image_file)
    embedding = get_image_embedding(image_path)
    embeddings.append(embedding)
    image_paths.append(image_path)


## 2. Store Image Embeddings in a Vector Database

In [None]:
# Convert embeddings to FAISS index
embeddings = np.array(embeddings).astype("float32")
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)  # L2 distance (Euclidean)
index.add(embeddings)

# Save FAISS index
faiss.write_index(index, "faiss_index.idx")

# Save image paths for later retrieval
with open("image_paths.txt", "w") as f:
    for path in image_paths:
        f.write(path + "\n")

print(f"Stored {len(image_paths)} image embeddings in FAISS index.")

## 3. Query Processing (Retrieval Step)

## 4. Augmenting the Query (Generation Step)