AAI 520 IN2 - LLM PROJECT

Sanjay Kumar and team

Sept 2025



In [None]:
#----------------python --------------------

In [3]:
!pip install rapidfuzz


Collecting rapidfuzz
  Downloading rapidfuzz-3.14.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading rapidfuzz-3.14.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz
Successfully installed rapidfuzz-3.14.1


In [4]:
pip install requests langchain-ollama


Collecting langchain-ollama
  Downloading langchain_ollama-0.3.10-py3-none-any.whl.metadata (2.1 kB)
Collecting ollama<1.0.0,>=0.5.3 (from langchain-ollama)
  Downloading ollama-0.6.0-py3-none-any.whl.metadata (4.3 kB)
Downloading langchain_ollama-0.3.10-py3-none-any.whl (27 kB)
Downloading ollama-0.6.0-py3-none-any.whl (14 kB)
Installing collected packages: ollama, langchain-ollama
Successfully installed langchain-ollama-0.3.10 ollama-0.6.0


In [26]:
# Accessing Hugging Face token from Colab secret
import os
from google.colab import userdata
userdata.get('secretName')

# Colab sets secrets as environment variables automatically
HUGGINGFACEHUB_API_TOKEN = os.environ.get("ipowatch_project1")
FINNHUB_API_KEY = os.environ.get("FINNHUB_API_KEY")

print("HF Token:", "Found" if HUGGINGFACEHUB_API_TOKEN else "Not Found")
print("Finnhub Token:", "Found" if FINNHUB_API_KEY else "Not Found")


SecretNotFoundError: Secret secretName does not exist.

In [27]:
from huggingface_hub import InferenceClient

client = InferenceClient(api_key=HUGGINGFACEHUB_API_TOKEN)
print("Hugging Face client initialized successfully!")


Hugging Face client initialized successfully!


In [34]:
# Accessing Hugging Face token from Colab secret
import os

# Colab sets secrets as environment variables automatically
HUGGINGFACEHUB_API_TOKEN = os.environ.get("ipowatch_project1")
FINNHUB_API_KEY = os.environ.get("FINNHUB_API_KEY")

print("HF Token:", "Found" if HUGGINGFACEHUB_API_TOKEN else "Not Found")
print("Finnhub Token:", "Found" if FINNHUB_API_KEY else "Not Found")


HF Token: Not Found
Finnhub Token: Not Found


In [35]:
import os
import datetime
import json
import requests
from rapidfuzz import process, fuzz
from huggingface_hub import InferenceClient

# -------------------- Hugging Face LLM Setup --------------------
HUGGINGFACEHUB_API_TOKEN = os.environ.get("ipowatch_project1")
if not HUGGINGFACEHUB_API_TOKEN:
    raise ValueError("Hugging Face token not found. Set Colab secret 'ipowatch_project1'.")

client = InferenceClient(api_key=HUGGINGFACEHUB_API_TOKEN)
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"

def query_llm(prompt):
    response = client.text_generation(
        prompt,
        model=MODEL_NAME,
        max_new_tokens=512,
        temperature=0.3
    )
    return response[0]["generated_text"]

# -------------------- Finnhub API Key from Colab Secret --------------------
FINNHUB_API_KEY = os.environ.get("FINNHUB_API_KEY")
if not FINNHUB_API_KEY:
    raise ValueError("Finnhub API key not found. Set Colab secret 'FINNHUB_API_KEY'.")

# -------------------- Constants --------------------
DATABASE_PATH = "database.json"
COMPANY_INFO_PATH = "company_tickers.json"

# -------------------- Database Helpers --------------------
def read_json(path):
    try:
        with open(path, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        return {}

def write_json(path, data):
    with open(path, "w") as f:
        json.dump(data, f, indent=4)

def write_company_name_to_database(company_name):
    db = read_json(DATABASE_PATH)
    db.setdefault("company_name", []).append(company_name)
    write_json(DATABASE_PATH, db)

def write_available_dates_to_database(months, years):
    db = read_json(DATABASE_PATH)
    db["date_response"] = {"months": months, "years": years}
    write_json(DATABASE_PATH, db)

# -------------------- Fuzzy Matching --------------------
def best_match(company_name, key="title", threshold=60):
    data = read_json(COMPANY_INFO_PATH)
    reference_list = [data[k] for k in data.keys()]
    choices = [r[key].lower() for r in reference_list]
    matches = process.extract(company_name, choices, scorer=fuzz.token_set_ratio,
                              limit=3, score_cutoff=threshold)
    results = []
    for match, score, idx in matches:
        results.append({"match": reference_list[idx][key], "score": score})
    return results

# -------------------- LLM Company Extraction --------------------
def find_company_names(request):
    prompt = f"""
    Extract company names from this user request.
    Only return a comma-separated list of company names.
    If none, return 'NO COMPANY FOUND'.
    Request: {request}
    """
    response = query_llm(prompt)
    print(f"LLM Output: {response}")

    company_names = response.lower()
    if "no company found" in company_names:
        print("No company names found.")
        return []

    names = [name.strip() for name in company_names.split(",") if name.strip()]
    confirmed = []

    for name in names:
        result = best_match(name)
        if not result:
            print(f"No valid match for '{name}'")
            continue

        best = result[0]
        print(f"Match for '{name}': {best}")
        if best["score"] > 90:
            write_company_name_to_database(best["match"])
            confirmed.append(best["match"])

    return confirmed

# -------------------- Date Extraction --------------------
def find_month_year(answer):
    months = ['january','february','march','april','may','june','july',
              'august','september','october','november','december']
    words = answer.replace('-', ' ').replace(',', ' ').split()
    found_months, found_years = [], []

    for w in words:
        if w.lower() in months:
            found_months.append(months.index(w.lower()) + 1)
        elif w.isdigit() and 2000 <= int(w) <= datetime.date.today().year:
            found_years.append(int(w))

    write_available_dates_to_database(found_months, found_years)
    return found_months, found_years

def find_dates(request):
    prompt = f"""
    Identify month(s) and year(s) in this text (if any).
    Return only month names and years. No explanation.
    Text: {request}
    """
    ans = query_llm(prompt)
    print(f"Date info found: {ans}")
    return find_month_year(ans)

# -------------------- Finnhub API --------------------
def get_company_news(symbol, from_date, to_date):
    url = f"https://finnhub.io/api/v1/company-news?symbol={symbol}&from={from_date}&to={to_date}&token={FINNHUB_API_KEY}"
    res = requests.get(url)
    if res.status_code == 200:
        data = res.json()
        print(f"\n--- {symbol} News ---")
        for item in data[:5]:
            print(f"- {item['headline']}")
    else:
        print(f"Error fetching news for {symbol}: {res.text}")

def get_recent_ipos():
    today = datetime.date.today()
    from_date = (today - datetime.timedelta(days=90)).isoformat()
    to_date = today.isoformat()

    url = f"https://finnhub.io/api/v1/calendar/ipo?from={from_date}&to={to_date}&token={FINNHUB_API_KEY}"
    res = requests.get(url)
    if res.status_code == 200:
        data = res.json().get("ipoCalendar", [])
        print("\n--- Recent IPOs ---")
        for d in data[:5]:
            print(f"{d['name']} ({d['symbol']}) - {d['date']}")
    else:
        print(f"Error fetching IPO data: {res.text}")

# -------------------- Main Workflow --------------------
def workflow():
    request = input("Enter your query: ")

    # Step 1: Find companies
    companies = find_company_names(request)

    # Step 2: Find dates
    months, years = find_dates(request)

    # Step 3: If company present → fetch company news
    if companies:
        to_date = datetime.date.today().isoformat()
        from_date = (datetime.date.today() - datetime.timedelta(days=30)).isoformat()
        for name in companies:
            db = read_json(COMPANY_INFO_PATH)
            for k, v in db.items():
                if v["title"].lower() == name.lower():
                    get_company_news(v["ticker"], from_date, to_date)
    else:
        # Step 4: No company → get recent IPOs
        get_recent_ipos()

if __name__ == "__main__":
    workflow()


ValueError: Hugging Face token not found. Set Colab secret 'ipowatch_project1'.

In [None]:
#------------------------------- end of python--------------------------------------------