In [1]:
from pinecone.grpc import PineconeGRPC as Pinecone
from openai import OpenAI
from dotenv import load_dotenv
import os
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [2]:
# Load environment variables from the .env file
load_dotenv()

# Initialize Pinecone client
api_key = os.getenv("PINECONE_API_KEY")
environment = 'us-east-1'  # Your Pinecone environment region

pc = Pinecone(api_key=api_key)

# Connect to your specific index
index_name = "investments"
index = pc.Index(index_name)

# Initialize OpenAI client
client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY")
)

# Initialize the embedding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [3]:
def generate_embedding(query_text):
    try:
        query_embedding = embed_model.get_text_embedding(query_text)
        return query_embedding.tolist() if hasattr(query_embedding, 'tolist') else query_embedding
    except Exception as e:
        print(f"Error generating embedding: {e}")
        return None


In [4]:
def query_pinecone(query_embedding):
    try:
        response = index.query(
            vector=query_embedding,
            top_k=5,
            include_metadata=True
        )
        if not response or 'matches' not in response or len(response['matches']) == 0:
            print("No matches found in Pinecone.")
            return None
        return response
    except Exception as e:
        print(f"Error querying Pinecone: {e}")
        return None

In [5]:
def query_llm(query, queried_data):
    try:
        # Extract narrative_texts from the queried data
        context = "\n".join([match['metadata']['narrative_texts'] for match in queried_data['matches'] if 'narrative_texts' in match['metadata']])
        
        if not context:
            print("No valid context found in Pinecone data.")
            return "I'm sorry, I couldn't find enough context to answer your query."

        prompt = (
            "Based on the following context, provide a detailed and expert-level response to the query. "
            "Ensure the response is well-structured, includes specific financial insights, comparisons to traditional financial instruments where relevant, and uses appropriate terminology.\n\n"
            "Context:\n"
            f"{context}\n\n"
            "Query:\n"
            f"{query}\n\n"
            "Response:"
        )
        response = client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are a finance and investment expert."},
                {"role": "user", "content": prompt}
            ],
            model="gpt-3.5-turbo",
            max_tokens=300,
            temperature=0.5
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error querying LLM: {e}")
        return "There was an error processing your request. Please try again later."

In [6]:
# Example usage
query_text = "Impact of interest rates on investments"
query_embedding = generate_embedding(query_text)

if query_embedding:
    pinecone_response = query_pinecone(query_embedding)
    if pinecone_response:
        llm_response = query_llm("What are the risks of rising interest rates?", pinecone_response)
        print(llm_response)

Rising interest rates pose several risks to investors and the overall economy. Understanding these risks is crucial for making informed investment decisions and managing financial portfolios effectively.

1. **Bond Price Risk**: One of the primary risks associated with rising interest rates is bond price risk. When market interest rates increase, the prices of existing bonds typically decrease. This inverse relationship between bond prices and interest rates is known as interest rate risk. Investors holding fixed-income securities, such as bonds, face the risk of seeing the value of their investments decline if interest rates rise. This is particularly true for longer-term bonds, as they are more sensitive to changes in interest rates.

2. **Inflation Risk**: Rising interest rates can also exacerbate inflation risk. While inflation erodes the purchasing power of money, rising interest rates may not keep pace with inflation, leading to a decrease in the real rate of return on investment

In [7]:
import openai
import os
import numpy as np
from sklearn.decomposition import PCA
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Initialize OpenAI client
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Sample data to fit PCA
# Ideally, this should be a batch of embeddings generated from similar queries
sample_queries = [
    "What are the benefits of diversifying an investment portfolio?",
    "How does quantitative easing affect the economy?",
    "What are the risks associated with investing in emerging markets?",
    "How do interest rate hikes impact the bond market?",
    "What factors drive the price of gold?",
    "How does inflation influence consumer spending?",
    "What are the pros and cons of real estate investment trusts (REITs)?",
    "How do geopolitical events affect global stock markets?",
    "What is the relationship between oil prices and inflation?",
    "How does currency exchange rate volatility impact multinational corporations?",
    "What are the key differences between growth and value investing?",
    "How do dividend yields affect stock prices?",
    "What are the advantages of investing in index funds?",
    "How does the Federal Reserve's monetary policy influence the stock market?",
    "What is the impact of corporate earnings on stock prices?",
    "How do changes in GDP growth affect equity markets?",
    "What are the effects of tariffs on international trade?",
    "How does consumer sentiment influence economic growth?",
    "What are the risks of investing in cryptocurrency?",
    "How does the bond yield curve indicate potential economic recessions?",
    "What are the benefits of holding a diversified bond portfolio?",
    "How do interest rate differentials affect currency values?",
    "What are the implications of government debt on future tax rates?",
    "How does fiscal stimulus impact economic growth?",
    "What are the challenges of investing in private equity?",
    "How do changes in labor market conditions affect inflation?",
    "What are the key drivers of real estate prices?",
    "How does corporate governance influence shareholder value?",
    "What are the risks of investing in high-yield bonds?",
    "How do central bank policies influence global financial markets?",
    "What are the benefits of investing in socially responsible funds?",
    "How does climate change impact investment strategies?",
    "What are the implications of demographic changes on long-term investment returns?",
    "How do changes in technology affect business competitiveness?",
    "What are the challenges of investing in small-cap stocks?",
    "How does the global supply chain affect company profitability?",
    "What are the effects of tax policy changes on corporate earnings?",
    "How do changes in interest rates impact mortgage rates?",
    "What are the risks and rewards of investing in venture capital?",
    "How does inflation affect fixed-income investments?",
    "What are the advantages of investing in international equities?",
    "How do changes in commodity prices influence inflation?",
    "What are the implications of trade deficits on currency values?",
    "How does political instability affect investment decisions?",
    "What are the benefits of investing in blue-chip stocks?",
    "How do changes in energy prices impact industrial production?",
    "What are the challenges of investing in frontier markets?",
    "How does consumer credit growth affect economic expansion?",
    "What are the risks of investing in leveraged ETFs?",
    "How does technological innovation drive economic growth?"
]


# Generate sample embeddings to fit PCA
sample_embeddings = []
for query in sample_queries:
    response = client.embeddings.create(input=query, model="text-embedding-ada-002")
    embedding = response.data[0].embedding
    sample_embeddings.append(embedding)

sample_embeddings = np.array(sample_embeddings)

# Fit the PCA model on the sample embeddings
pca = PCA(n_components=384)
pca.fit(sample_embeddings)

def generate_embedding(query_text):
    try:
        response = client.embeddings.create(input=query_text, model="text-embedding-ada-002")
        query_embedding = response.data[0].embedding
        
        # Convert to numpy array for PCA transformation
        query_embedding = np.array(query_embedding).reshape(1, -1)
        
        # Apply PCA to reduce to 384 dimensions
        reduced_embedding = pca.transform(query_embedding)
        
        return reduced_embedding.flatten().tolist()
    except Exception as e:
        print(f"Error generating embedding with OpenAI: {e}")
        return None

# Example usage
query_text = "Impact of interest rates on investments"
embedding = generate_embedding(query_text)
print(embedding)


ValueError: n_components=384 must be between 0 and min(n_samples, n_features)=50 with svd_solver='full'