In [1]:
# Import required packages
import os
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from supabase import create_client
from langchain_community.vectorstores import SupabaseVectorStore


# Load environment variables from .env file
load_dotenv()

# Initialize core components
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
supabase_url = os.getenv("SUPABASE_URL")
supabase_key = os.getenv("SUPABASE_SERVICE_KEY")
supabase_client = create_client(supabase_url, supabase_key)

# Initialize LLM and embeddings
llm = init_chat_model("gpt-4o-mini", model_provider="openai", streaming=True)
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

vector_store = SupabaseVectorStore(
    client=supabase_client,
    embedding=embeddings,
    table_name="documents",  # Replace with your desired table name
    query_name="match_documents"  # Replace with your desired query name
)

In [3]:
import bs4
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_openai import OpenAIEmbeddings


# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert AI assistant tasked with creating comprehensive reports. When generating reports:
    - Start with a brief executive summary
    - Organize information into clear sections with headings
    - Include relevant examples and explanations
    - Use bullet points and numbered lists where appropriate
    - Synthesize information from multiple sources in the context
    - Maintain a professional, analytical tone
    - Conclude with key takeaways
    
    Base your report ONLY on the provided context."""),
    ("human", "Using the following context, generate a detailed report about: {question}\n\nContext: {context}")
])


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
# Modify retrieve function for broader context
def retrieve(state: State):
    # Get more documents for comprehensive coverage
    retrieved_docs = vector_store.similarity_search(state["question"], k=6)
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    
    # Create streaming response
    stream = llm.stream(messages)
    
    # Initialize response content
    response_content = ""
    
    # Process stream chunks
    for chunk in stream:
        if chunk.content is not None:
            print(chunk.content, end="", flush=True)
            response_content += chunk.content
            
    return {"answer": response_content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [3]:
response = graph.invoke({"question": "What is Task Decomposition?"})
print(response["answer"])

Task decomposition is the process of breaking down a complicated task into smaller, more manageable steps. This can be achieved through techniques like Chain of Thought (CoT), where the model is prompted to think step by step, or by utilizing task-specific instructions. It allows for improved problem-solving by creating a structured approach to address each component of the task.


In [3]:
response = graph.invoke({"question": "Generate a long length report on task decomposition. Use paragraphs and lists to make it more readable."})
print(response["answer"])

# Report on Task Decomposition in Software Development

## Executive Summary
Task decomposition is a critical practice in software development, especially when dealing with complex problems. This report will explore the concept of task decomposition, focusing on how it can improve problem-solving efficiency by breaking down larger tasks into more manageable components. We will discuss the Chain of Thought (CoT) prompting technique and its relevance, as well as the methodology for creating a structured coding architecture that facilitates this process.

## 1. Introduction to Task Decomposition
Task decomposition refers to the systematic breakdown of a complex task into simpler, more manageable subtasks. This process not only simplifies the execution of the task but also clarifies the necessary steps and helps in planning and organizing the workflow.

### 1.1 Importance
- **Manageability**: Smaller tasks are easier to manage and keep track of.
- **Clarity**: Decomposing tasks provides cl

In [4]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import SupabaseVectorStore
from supabase import create_client
import os
from dotenv import load_dotenv
import sys
sys.path.append("../src")  # Add the src directory to Python path
from data.ingestion import DataIngestionPipeline

# Initialize pipeline
pipeline = DataIngestionPipeline()

# Ingest Federal Reserve content
urls = [
    "https://www.federalreserve.gov/newsevents/pressreleases/monetary20240320a.htm",
]
num_chunks = pipeline.ingest_web_content(urls)
print(f"Processed {num_chunks} chunks from web content")

# Now your query about Fed rates should work
docs = vector_store.similarity_search(
    "What was the Federal Reserve's latest decision on interest rates?",
    k=2
)




for doc in docs:
    print("\nDocument content:", doc.page_content)
    print("\nMetadata:", doc.metadata)

Processed 14 chunks from web content

Document content: In support of its goals, the Committee decided to maintain the target range for the federal funds rate at 5-1/4 to 5-1/2 percent. In considering any adjustments to the target range for the federal funds rate, the Committee will carefully assess incoming data, the evolving outlook, and the balance of risks. The Committee does not expect it will be appropriate to reduce the target range until it has gained greater confidence that inflation is moving sustainably toward 2 percent. In addition, the Committee will continue reducing its holdings of Treasury securities and agency debt and agency mortgage-backed securities, as described in its previously announced plans. The Committee is strongly committed to returning inflation to its 2 percent objective.

Metadata: {'title': 'Federal Reserve Board - Federal Reserve issues FOMC statement', 'source': 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20240320a.htm', 'language

In [8]:
# Create a report-focused prompt template
report_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert economic analyst tasked with creating comprehensive reports. When generating reports:
    - Start with a brief executive summary
    - Organize information into clear sections with headings
    - Analyze the Fed's decisions and their implications
    - Include relevant economic data points mentioned
    - Discuss forward guidance and policy outlook
    - Maintain a professional, analytical tone
    - Conclude with key takeaways
    
    Base your report MOSTLY on the provided context."""),
    ("human", "Using the following Fed statement context, generate a detailed economic report analyzing the current monetary policy stance and economic outlook:\n\nContext: {context}")
])

# Get documents from vector store
docs = vector_store.similarity_search(
    "Federal Reserve monetary policy stance and economic outlook",
    k=4  # Increase k to get more context
)

# Prepare context from documents
context = "\n\n".join(doc.page_content for doc in docs)

# Generate the report
messages = report_prompt.invoke({"context": context})
response = llm.invoke(messages)

print(response.content)

# Economic Report: Federal Reserve Monetary Policy Stance and Economic Outlook

## Executive Summary

The Federal Reserve's recent monetary policy decisions indicate a commitment to maintaining a cautious approach amid evolving economic conditions. With a sustained target range for the federal funds rate at 5-1/4 to 5-1/2 percent, the Fed's strategy reflects its ongoing mission to achieve maximum employment while managing inflation at the 2 percent target over the longer term. Key indicators suggest solid economic expansion; however, the Fed remains vigilant regarding rising inflation risks. The following sections analyze the current economic environment, the Federal Open Market Committee's (FOMC) decisions, and future policy implications.

## Economic Activity and Labor Market Conditions

Recent reports indicate that economic activity is expanding at a solid pace. Job gains remain robust, keeping the unemployment rate low. This labor market strength is crucial as it supports consumer 

In [9]:
# Create a report-focused prompt template
report_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert economic analyst tasked with creating comprehensive reports. When generating reports:
    - Start with a brief executive summary
    - Organize information into clear sections with headings
    - Analyze the Fed's decisions and their implications
    - Include relevant economic data points mentioned
    - Discuss forward guidance and policy outlook
    - Maintain a professional, analytical tone
    - Conclude with key takeaways
    
    Base your report MOSTLY on the provided context."""),
    ("human", "Generate a detailed economic report analyzing the current monetary policy stance and economic outlook:\n\nContext: {context}")
])

# Get documents from vector store
docs = vector_store.similarity_search(
    "Federal Reserve monetary policy stance and economic outlook",
    k=4  # Increase k to get more context
)

# Prepare context from documents
context = "\n\n".join(doc.page_content for doc in docs)

# Generate the report
messages = report_prompt.invoke({"context": context})
response = llm.invoke(messages)

print(response.content)

# Economic Report on Monetary Policy Stance and Economic Outlook

## Executive Summary
The Federal Reserve's latest assessments reflect a cautious yet proactive approach to current monetary policy, maintaining the federal funds rate target range between 5.25% and 5.50%. The Committee's focus remains on achieving maximum employment and an inflation rate of 2% over the longer term. Economic activity is expanding with strong job growth, while inflation has softened but remains above target levels. The forward guidance indicates that the Fed will adjust its policies based on incoming data and risk assessments, emphasizing continued vigilance regarding inflation pressures.

## Current Monetary Policy Stance

### Interest Rate Decision
The Federal Open Market Committee (FOMC) has decided to keep the target range for the federal funds rate at 5.25% to 5.50%. This decision reflects a balanced approach to managing inflation while fostering economic growth. The Committee explicitly indicated tha

In [16]:
# Test Gmail ingestion
from data.ingestion import DataIngestionPipeline
import os
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain.chat_models import init_chat_model
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import SupabaseVectorStore
from supabase import create_client
from concurrent.futures import ThreadPoolExecutor
import time

report_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert economic analyst tasked with creating comprehensive reports. When generating reports:
    - Start with a brief executive summary
    - Organize information into clear sections with headings
    - Analyze the Fed's decisions and their implications
    - Include relevant economic data points mentioned
    - Discuss forward guidance and policy outlook
    - Maintain a professional, analytical tone
    - Conclude with key takeaways
    
    Base your report MOSTLY on the provided context."""),
    ("human", "Generate a detailed economic report analyzing the current monetary policy stance and economic outlook:\n\nContext: {context}")
])

# Load environment variables
load_dotenv()

# Initialize core components
supabase_url = os.getenv("SUPABASE_URL")
supabase_key = os.getenv("SUPABASE_SERVICE_KEY")
supabase_client = create_client(supabase_url, supabase_key)

# Initialize LLM and embeddings
llm = init_chat_model("gpt-4-turbo-preview", model_provider="openai")
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

vector_store = SupabaseVectorStore(
    client=supabase_client,
    embedding=embeddings,
    table_name="documents",
    query_name="match_documents"
)

# Initialize pipeline
pipeline = DataIngestionPipeline()

query = "from:connie@strictlyvc.com OR from:fortune@newsletter.fortune.com OR from:anand.sanwal@cbinsights.com OR from:FT@newsletters.ft.com"

# Process in smaller batches with delay
batch_size = 5  # Reduced batch size
delay_between_batches = 2  # seconds
total_chunks = 0

try:
    for start_index in range(0, 100, batch_size):
        num_chunks = pipeline.ingest_gmail(
            credentials_path='credentials.json',
            query=query,
            max_results=batch_size,
            metadata={"batch_start": start_index}
        )
        total_chunks += num_chunks
        print(f"Processed batch starting at {start_index}, got {num_chunks} chunks")
        
        # Add delay between batches
        time.sleep(delay_between_batches)
        
except Exception as e:
    print(f"Error occurred at batch {start_index}: {str(e)}")
    
print(f"Processed total of {total_chunks} chunks from Gmail")


# Test retrieval with a market-related query
docs = vector_store.similarity_search(
    "What are the latest market updates and economic indicators?",
    k=3
)

# Prepare context from documents
context = "\n\n".join(doc.page_content for doc in docs)

# Generate the report
messages = report_prompt.invoke({"context": context})
response = llm.invoke(messages)

print(response.content)

print("\nRetrieved Documents:")
count = 0
for doc in docs:
    count += 1
    print("\nDocument content:", doc.page_content)
    print("\nMetadata:", doc.metadata)

print(f"Processed {count} chunks from Gmail")


Processed batch starting at 0, got 287 chunks
Processed batch starting at 5, got 287 chunks
Processed batch starting at 10, got 287 chunks
Processed batch starting at 15, got 287 chunks
Processed batch starting at 20, got 287 chunks
Processed batch starting at 25, got 287 chunks
Processed batch starting at 30, got 287 chunks
Processed batch starting at 35, got 287 chunks
Processed batch starting at 40, got 287 chunks
Processed batch starting at 45, got 287 chunks
Processed batch starting at 50, got 287 chunks
Processed batch starting at 55, got 287 chunks
Processed batch starting at 60, got 287 chunks
Processed batch starting at 65, got 287 chunks
Processed batch starting at 70, got 287 chunks
Processed batch starting at 75, got 287 chunks
Processed batch starting at 80, got 287 chunks
Processed batch starting at 85, got 287 chunks
Processed batch starting at 90, got 287 chunks
Processed batch starting at 95, got 287 chunks
Processed total of 5740 chunks from Gmail


APIError: {'code': '57014', 'details': None, 'hint': None, 'message': 'canceling statement due to statement timeout'}

In [None]:
docs = vector_store.similarity_search(
    "What are the latest market updates and economic indicators?",
    k=3
)

report_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an expert macro and volatility analyst with deep experience in global markets, monetary policy, and risk analysis. When analyzing markets and responding to queries:

- Start with key market metrics and indicators:
  * VIX, MOVE, and other volatility measures
  * Treasury yields and yield curve dynamics
  * Credit spreads and financial conditions
  * Currency movements and cross-asset correlations
  * Commodity prices and trends

- Provide detailed analysis of:
  * Central bank policies and their market implications (Primarily the Fed)
  * Geopolitical risks and their potential market impact
  * Positioning data and market sentiment indicators
  * Systematic flows and technical factors
  * Cross-asset relationships and regime changes

- Structure your responses with:
  * Clear executive summary highlighting key points
  * Detailed analysis backed by specific data points
  * Forward-looking scenarios and their probabilities
  * Specific risks to the current market narrative
  * Actionable trading implications

- Consider portfolio implications for current positions:
  * Palantir, Snowflake, Meta, Apple, AMD, NVIDIA, TSMC, and SPY
  * Impact on different asset classes and sectors
  * Correlation changes and diversification effects
  * Tail risk scenarios and hedging strategies
  * Position sizing and risk management recommendations
  * Time horizon considerations

Base your analysis primarily on the provided context, but incorporate your broad market knowledge where relevant. Be specific rather than ambivalent - represent the views in the context. Maintain a professional, analytical tone and clearly distinguish between facts and opinions."""),
    ("human", "Generate a detailed market analysis with specific implications for our portfolio positions:\n\nContext: {context}")
])


# Prepare context from documents
context = "\n\n".join(doc.page_content for doc in docs)

# Generate the report
messages = report_prompt.invoke({"context": context})
stream = llm.stream(messages)

# Process the streaming response
response_content = ""
for chunk in stream:
    if chunk.content is not None:
        print(chunk.content, end="", flush=True)
        response_content += chunk.content

print("\nRetrieved Documents:")
count = 0
for doc in docs:
    count += 1
    print("\nDocument content:", doc.page_content)
    print("\nMetadata:", doc.metadata)


### Executive Summary

This analysis assesses the current market environment, incorporating the latest economic updates from Mexico and Colombia, impending earnings reports from Alphabet, KKR, and Pfizer, and global market metrics as of 0631 GMT-5. Given these conditions and our portfolio positions (Palantir, Snowflake, Meta, Apple, AMD, NVIDIA, TSMC, and SPY), we explore the implications of central bank policies, geopolitical risks, and market sentiment indicators to propose forward-looking scenarios and actionable trading implications.

### Detailed Market Analysis

#### Economic and Corporate Updates

1. **Mexico and Colombia Economic Forecasts**:
    - **Mexico**: The Bank of Mexico's updated GDP growth, inflation, exchange rate, and benchmark interest rate forecasts will provide insights into the economic health and policy trajectory of one of Latin America's largest economies. Higher-than-expected inflation or lower GDP growth forecasts could signal rising risks in emerging marke

In [29]:
from langchain import hub

docs = vector_store.similarity_search(
    "Which energy company laid off 20% of its employees?",
    k=3
)

prompt = hub.pull("rlm/rag-prompt")

context = "\n\n".join(doc.page_content for doc in docs)

# Modified prompt invocation
messages = prompt.invoke({
    "context": context,
    "question": "Which energy company laid off 20% of its employees?"
})
response = llm.stream(messages)

for chunk in response:
    if chunk.content is not None:
        print(chunk.content, end="", flush=True)





Chevron was the energy company that laid off 20% of its employees.