In [1]:
import os
from crewai import Crew, Agent, LLM, Task
from crewai_tools import BaseTool
from langchain_openai import AzureChatOpenAI
from langchain.vectorstores import Chroma
from langchain_community.embeddings import SentenceTransformerEmbeddings
from sentence_transformers import SentenceTransformer
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.agents import Tool

import warnings
warnings.filterwarnings('ignore')

In [2]:
llm_4o = LLM(
    model="azure/gpt-4o",
    base_url="https://genai-openai-ai-mazinghacktivists.openai.azure.com/",
    api_key="446cd4ef4aad49e097e94e63a0593be2"
)

In [3]:
# Website Data Ingestion
loader = UnstructuredURLLoader(urls=["https://docs.crewai.com/how-to/Installing-CrewAI/"])
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)

In [4]:
# Initialize Embeddings
model_name = "all-MiniLM-L6-v2"
embeddings = SentenceTransformerEmbeddings(model_name=model_name)

# Create & Persist Vector Database
db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db")
db.persist()

# Define Retriever from Vector Store
retriever = db.as_retriever()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
class DBRetriever(BaseTool):
    name: str = "db_retriever"
    description: str = "To search for documents related to the user’s query."

    def _run(self, query: str) -> str:
        # Implementation goes here
        return retriever.get_relevant_documents(query)

In [6]:
rwa_optimization_agent = Agent(
    role="RWA Optimization Advisor",
    goal="Offers targeted recommendations to reduce RWA and suggest optimized capital allocation.",
    backstory="""You are a senior RWA Optimization Advisor at Barclays Bank PLC, skilled in providing RWA Optimization 
    strategies. Your job  is to provide strategies for minimizing RWA, recommend actions such as asset reclassification, securitization ,
    or reducing exposure high-risk assets. Besides, your job is to suggest optimized capital allocation to minimize RWA while managing risk exposure
    """,
    llm=llm_4o
)

In [7]:
rwa_optimization_task = Task(
    description="Analyze the user input ({user_input} and identify key risks and RWA optimization opportunities",
    expected_output="A bullet list summary of top 5 most important RWA Optimization opportunities",
    agent = rwa_optimization_agent
)

In [8]:
# Create Crew
research_crew1 = Crew(
    agents=[rwa_optimization_agent],
    tasks=[rwa_optimization_task],
    verbose=True  # This will print logs to the console as the crew works
)

# Job Context
job_crew_works = {
    'user_input': 'Barclays wants to trade in India real estate sector with exposure of INR 1 billion. Please suggest RWA optimization strategy adhering to Basel 3 regulations.'
}

# Kickoff the Crew's Work
result = research_crew1.kickoff(inputs=job_crew_works)
print(result)

[1m[95m# Agent:[00m [1m[92mRWA Optimization Advisor[00m
[95m## Task:[00m [92mAnalyze the user input (Barclays wants to trade in India real estate sector with exposure of INR 1 billion. Please suggest RWA optimization strategy adhering to Basel 3 regulations. and identify key risks and RWA optimization opportunities[00m


[1m[95m# Agent:[00m [1m[92mRWA Optimization Advisor[00m
[95m## Final Answer:[00m [92m
1. **Asset Reclassification:**
   - Reclassify certain real estate exposures to categories that attract lower risk weights under Basel 3 regulations. For example, shifting assets to properties with higher liquidity or to sectors with historically lower default rates can help reduce RWA.
   
2. **Securitization:**
   - Securitize a portion of the real estate portfolio to transfer the credit risk to third-party investors. This can be achieved by creating asset-backed securities (ABS) or mortgage-backed securities (MBS), thereby reducing the bank's RWAs while maintaini

In [9]:
import os
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.vector_stores.azureaisearch import AzureAISearchVectorStore, IndexManagement

In [20]:
!pip install azure-ai-textanalytics

Collecting azure-ai-textanalytics
  Downloading azure_ai_textanalytics-5.3.0-py3-none-any.whl.metadata (82 kB)
Downloading azure_ai_textanalytics-5.3.0-py3-none-any.whl (298 kB)
Installing collected packages: azure-ai-textanalytics
Successfully installed azure-ai-textanalytics-5.3.0


In [22]:
!pip install azure-ai-openai

ERROR: Could not find a version that satisfies the requirement azure-ai-openai (from versions: none)
ERROR: No matching distribution found for azure-ai-openai


In [21]:
# Environment Variables
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "https://genai-openai-ai-mazinghacktivists.openai.azure.com/")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", "66243e4ca95e4b46ae6b8a7cc29c7517")
AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME", "gpt-35-turbo") # I'm using GPT-4o
AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME", "text-embedding-3-large") # I'm using text-embedding-3-large
SEARCH_SERVICE_ENDPOINT = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT", "https://rwa.search.windows.net")
SEARCH_SERVICE_API_KEY = os.getenv("AZURE_SEARCH_ADMIN_KEY", "nKmwEveDrRLrUCZHbzBO9rEQOByUS6Nv2lFAglTFiSAzSeChq7aX")
INDEX_NAME = "json-vector"

# Initialize Azure OpenAI and embedding models
llm = AzureOpenAI(
    model=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    deployment_name=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version="2024-02-01"
)

from azure.ai.openai import OpenAIClient
from azure.core.credentials import AzureKeyCredential
import numpy as np

# Set up the Azure OpenAI embedding configuration
class AzureOpenAIEmbedding:
    def __init__(self, model, deployment_name, api_key, azure_endpoint, api_version="2024-02-01"):
        # Initialize the client with the given Azure credentials
        self.client = AzureOpenAIClient(
            endpoint=azure_endpoint,
            credential=AzureKeyCredential(api_key)
        )
        self.model = model
        self.deployment_name = deployment_name
        self.api_version = api_version

    def get_embedding(self, text: str) -> np.ndarray:
        # Generate the embedding for the input text
        response = self.client.embeddings(
            model=self.model,
            input=[text],  # List of texts to embed (here, we're embedding one text)
            deployment_name=self.deployment_name,
            api_version=self.api_version
        )
        
        # Extract the embedding from the response
        embedding = response[0]['embedding']
        return np.array(embedding)

embed_model = AzureOpenAIEmbedding(
    model=AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
    deployment_name=AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version="2024-02-01"
)

# Initialize search clients
credential = AzureKeyCredential(SEARCH_SERVICE_API_KEY)
index_client = SearchIndexClient(endpoint=SEARCH_SERVICE_ENDPOINT, credential=credential)
search_client = SearchClient(endpoint=SEARCH_SERVICE_ENDPOINT, index_name=INDEX_NAME, credential=credential)

ModuleNotFoundError: No module named 'azure.ai.openai'

In [11]:
from llama_index.vector_stores.azureaisearch import (
    IndexManagement,
    MetadataIndexFieldType,
)
import nest_asyncio
from llama_index.core.extractors import TitleExtractor, QuestionsAnsweredExtractor
from llama_index.core.node_parser import TokenTextSplitter

nest_asyncio.apply()


from llama_index.core.settings import Settings

Settings.llm = llm
Settings.embed_model = embed_model

In [12]:

vector_store = AzureAISearchVectorStore(
        search_or_index_client=index_client,
        index_name=INDEX_NAME,
        index_management=IndexManagement.VALIDATE_INDEX,
        id_field_key="parent_id",
        chunk_field_key="chunk_id",
        embedding_field_key="text_vector",
        embedding_dimensionality=3072,
        doc_id_field_key="parent_id",
        language_analyzer="en.lucene",
        vector_algorithm_type="exhaustiveKnn",
        metadata_string_field_key="title"
    )

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
        [],
        storage_context=storage_context,
    )

In [13]:
import openai
import numpy as np

def get_embedding(text: str) -> np.ndarray:
    response = openai.embeddings.create(
        model="text-embedding-ada-002",  # You can choose different models for embeddings
        input=text
    )
    # Extract the embedding vector from the response
    embedding = response['data'][0]['embedding']
    return np.array(embedding)

In [15]:
# Example: Querying the vector store with a text query (need embedding model)
query_text = "Barclays wants to trade in India finance sector with exposure of INR 1 billion. Please suggest RWA optimization strategy adhering to Basel 3 regulations."
query_embedding = embed_model.get_embedding(query_text)  # This should be a function that converts text to vector

# Search for the nearest neighbors (assumes vector store is properly set up)
results = vector_store.query(query_embedding, top_k=5)

# Print out results
for result in results:
    print(f"ID: {result['parent_id']}, Title: {result['title']}")

AttributeError: 'AzureOpenAIEmbedding' object has no attribute 'get_embedding'

In [48]:
from llama_index.core.response.notebook_utils import display_response
from llama_index.core.schema import MetadataMode

# Query execution
query = "Barclays wants to trade in India finance sector with exposure of INR 1 billion. Please suggest RWA optimization strategy adhering to Basel 3 regulations."
query_engine = index.as_query_engine(llm, similarity_top_k=3)
response = query_engine.query(query)

# Print the response
display_response(response)
print("\n")

# Print what the LLM sees
for node in response.source_nodes:
    print(node.get_content(metadata_mode=MetadataMode.LLM))

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [25]:
!pip install requests



In [28]:
import requests
import json
import numpy as np
from llama_index.core.response.notebook_utils import display_response
from llama_index.core.schema import MetadataMode

# Set up the Azure OpenAI embedding configuration
class AzureOpenAIEmbedding:
    def __init__(self, model_name, deployment_name, api_key, azure_endpoint, api_version="2024-02-01"):
        self.model_name = model_name
        self.deployment_name = deployment_name
        self.api_key = api_key
        self.azure_endpoint = azure_endpoint
        self.api_version = api_version
        self.headers = {
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {self.api_key}'
        }
        
    def get_embedding(self, text: str) -> np.ndarray:
        # Endpoint to get embeddings
        url = f"{self.azure_endpoint}/openai/deployments/{self.deployment_name}/embeddings?api-version={self.api_version}"
        
        # Payload for the request
        payload = {
            "input": [text],
            "model": self.model_name
        }

        # Make the API call
        response = requests.post(url, headers=self.headers, json=payload)

        if response.status_code == 200:
            # Extract the embeddings from the response
            data = response.json()
            embedding = data['data'][0]['embedding']
            return np.array(embedding)
        else:
            raise Exception(f"Error in embedding request: {response.status_code}, {response.text}")

# Create a wrapper for embedding retrieval (this turns it into a simple function)
def get_embedding_function(text: str) -> np.ndarray:
    embed_model = AzureOpenAIEmbedding(
        model_name="text-embedding-ada-002",  # Replace with your model
        deployment_name="your-deployment-name",  # Use your deployment name
        api_key="your-azure-api-key",  # Your Azure API key
        azure_endpoint="https://<your-resource-name>.openai.azure.com",  # Your Azure endpoint
        api_version="2024-02-01"  # Ensure this is the correct version
    )
    return embed_model.get_embedding(text)



In [29]:

# Example of creating the query engine with the embedding model
# Assume you have an existing `index` object for querying
# Ensure you have LlamaIndex and the necessary engine set up for querying

# Create the query engine and perform the query
query = "Barclays wants to trade in India finance sector with exposure of INR 1 billion. Please suggest RWA optimization strategy adhering to Basel 3 regulations."

# Use the Azure OpenAI model to query the index
query_engine = index.as_query_engine(llm=get_embedding_function, similarity_top_k=3)
response = query_engine.query(query)

# Display the response from the query engine
display_response(response)

# Print the full response, including what the LLM sees (the source nodes)
for node in response.source_nodes:
    print(node.get_content(metadata_mode=MetadataMode.LLM))


AssertionError: 