In [None]:
# PROJECT 1: AI-Powered Financial Risk Analyst
# Author: Nazim Biplob
# Description: Using RAG (Retrieval-Augmented Generation) to analyze SEC 10-K Filings.

# 1. Setup & Install
!pip install -U -q langchain langchain-community faiss-cpu pypdf sentence-transformers textblob matplotlib

import os
import matplotlib.pyplot as plt
from textblob import TextBlob
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# 2. Load Data (Apple 2025 10-K)
# Note: In a production environment, this would pull directly from EDGAR.
# For this portfolio demo, we load a local sample.
loader = PyPDFLoader("_10-K-2025-As-Filed.pdf")
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(pages)

# 3. Create Vector Database (The "Brain")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(docs, embeddings)

# 4. Semantic Search & Sentiment Analysis
query = "Risk Factors and adverse effects on business"
results = vector_store.similarity_search(query, k=5)

sentiments = []
labels = []

for i, doc in enumerate(results):
    score = TextBlob(doc.page_content).sentiment.polarity
    sentiments.append(score)
    labels.append(f"Risk Chunk {i+1}")

# 5. Visualization
plt.figure(figsize=(10, 6))
colors = ['red' if s < 0 else 'green' for s in sentiments]
plt.bar(labels, sentiments, color=colors)
plt.axhline(0, color='black', linewidth=0.8)
plt.title(f"Sentiment Analysis of Apple's Risk Factors (AI Derived)", fontsize=14)
plt.ylabel("Sentiment Score (-1 = Negative, +1 = Positive)")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()