In [25]:
from financialtools.processor import Downloader, RateLimiter  # Assume the updated Downloader class is in downloader.py
limiter = RateLimiter(per_minute=60, per_hour=360, per_day=8000)
quantum_tickers = ['OXIG', 'HON', 'PH', 'MMM', 'TDY', 'KEYS', 'AEIS', 'APD', 'QUBT', 'EMR', 'SWKS', 'QRVO', 'TXN', 'ADI', 'FTV', 'GTLS', 'LIN', 'AI.PA', 'MKSI', 'CRYM', 'ACLS', 'SUSS', 'VECO', 'LRCX', 'AIXTRON']


In [26]:
# d = Downloader.from_ticker(quantum_tickers[1])

In [27]:
# dfs = [d._balance_sheet, d._cashflow, d._income_stmt, d._info]
# dfs

In [28]:
# d._balance_sheet.merge(d._income_stmt,  how="left", on=["ticker", "time"]).merge(d._cashflow,  how="left", on=["ticker", "time"])

In [29]:
# merged = dfs[0]
# for df in dfs[1:]:  
#     merged = merged.merge(df, how="left", on=["ticker", "time"])

# merged

In [30]:
def stream_download(tickers, limiter, out_dir="./quantum_data"):
        """Stream tickers one by one, save each to Parquet/JSON as soon as ready."""
        import os
        import pandas as pd

        os.makedirs(out_dir, exist_ok=True)

        for t in tickers:
            limiter.acquire()
            try:
                d = Downloader.from_ticker(t)

                tables = {
                    "merged_data": d.get_merged_data(),
                    "info": d.get_info_data()
                }
                # print(tables)

                for name, df in tables.items():
                    try:
                        if df is not None and not df.empty:
                            path = os.path.join(out_dir, f"{t}_{name}.parquet")
                            df.to_parquet(path)
                    except:
                        pass  # silently skip file write errors

                print(f"Saved {t} data to {out_dir}")
                yield d

            except:
                pass  # silently skip ticker-level errors

In [31]:
limiter = RateLimiter(per_minute=60, per_hour=360, per_day=8000)

In [32]:
for dl in Downloader.stream_download(quantum_tickers, limiter, out_dir='./quantum_data'):
    print(f"{dl.ticker} ready → {len(dl._balance_sheet)} rows")

HTTP Error 404: 


Saved OXIG data to ./quantum_data
OXIG ready → 0 rows
Saved HON data to ./quantum_data
HON ready → 5 rows
Saved PH data to ./quantum_data
PH ready → 5 rows
Saved MMM data to ./quantum_data
MMM ready → 5 rows
Saved TDY data to ./quantum_data
TDY ready → 5 rows
Saved KEYS data to ./quantum_data
KEYS ready → 5 rows
Saved AEIS data to ./quantum_data
AEIS ready → 5 rows
Saved APD data to ./quantum_data
APD ready → 4 rows
Saved QUBT data to ./quantum_data
QUBT ready → 5 rows
Saved EMR data to ./quantum_data
EMR ready → 4 rows
Saved SWKS data to ./quantum_data
SWKS ready → 4 rows
Saved QRVO data to ./quantum_data
QRVO ready → 5 rows
Saved TXN data to ./quantum_data
TXN ready → 5 rows
Saved ADI data to ./quantum_data
ADI ready → 5 rows
Saved FTV data to ./quantum_data
FTV ready → 5 rows
Saved GTLS data to ./quantum_data
GTLS ready → 5 rows
Saved LIN data to ./quantum_data
LIN ready → 5 rows
Saved AI.PA data to ./quantum_data
AI.PA ready → 5 rows
Saved MKSI data to ./quantum_data
MKSI ready → 5

HTTP Error 404: 


Saved AIXTRON data to ./quantum_data
AIXTRON ready → 0 rows


In [33]:
import pandas as pd
import pathlib

def load_and_combine_ticker_data(tickers, data_dir="./quantum_data"):
    """
    Load and combine merged and info parquet files for a list of tickers.

    Returns:
        tuple (merged_df, info_df)
    """
    data_dir = pathlib.Path(data_dir)
    merged_list, info_list = [], []

    for ticker in tickers:
        # Load merged data
        merged_key = f"{ticker}_merged"
        merged_files = list(data_dir.glob(f"*{merged_key}*.parquet"))
        merged_dfs = []
        for f in merged_files:
            df = pd.read_parquet(f)
            # ✅ keep only dataframes with at least one non-NA column and row
            df_clean = df.dropna(axis=1, how="all")
            if not df_clean.empty and df_clean.notna().any().any():
                merged_dfs.append(df_clean)
        if merged_dfs:
            merged_df = pd.concat(merged_dfs, ignore_index=True)
            merged_df["ticker"] = ticker
            merged_list.append(merged_df)

        # Load info data
        info_key = f"{ticker}_info"
        info_files = list(data_dir.glob(f"*{info_key}*.parquet"))
        info_dfs = []
        for f in info_files:
            df = pd.read_parquet(f)
            df_clean = df.dropna(axis=1, how="all")
            if not df_clean.empty and df_clean.notna().any().any():
                info_dfs.append(df_clean)
        if info_dfs:
            # ✅ ensures no empty/all-NA DF reaches concat
            info_df = pd.concat(info_dfs, ignore_index=True)
            info_df["ticker"] = ticker
            info_df = info_df.drop(columns="ebitda", errors="ignore")
            info_df.columns = info_df.columns.str.lower()
            info_list.append(info_df)

    merged_df = pd.concat(merged_list, ignore_index=True) if merged_list else pd.DataFrame()
    info_df = pd.concat(info_list, ignore_index=True) if info_list else pd.DataFrame()

    return merged_df, info_df


In [34]:
fin_data, info_data = load_and_combine_ticker_data(quantum_tickers, data_dir="./quantum_data")

In [35]:
fin_data

Unnamed: 0,ticker,docs_x,time,accounts_payable,accounts_receivable,accumulated_depreciation,additional_paid_in_capital,allowance_for_doubtful_accounts_receivable,assets_held_for_sale_current,buildings_and_improvements,...,preferred_shares_number,total_other_finance_cost,receivables_adjustments_allowances,depreciation_income_statement,other_payable,dividends_received_cfi,interest_paid_cff,current_deferred_taxes_assets,amortization_of_securities,other_inventories
0,HON,balance_sheet,2020-12-31,,,,,,,,...,,,,,,,,,,
1,HON,balance_sheet,2021-12-31,6.484000e+09,6.830000e+09,-8.888000e+09,8.141000e+09,-177000000.0,,3.225000e+09,...,,,,,,,,,,
2,HON,balance_sheet,2022-12-31,6.329000e+09,7.440000e+09,-9.291000e+09,8.564000e+09,-326000000.0,,3.394000e+09,...,,,,,,,,,,
3,HON,balance_sheet,2023-12-31,6.849000e+09,7.530000e+09,-9.674000e+09,9.062000e+09,-323000000.0,0.000000e+00,3.528000e+09,...,,,,,,,,,,
4,HON,balance_sheet,2024-12-31,6.880000e+09,7.819000e+09,-9.658000e+09,9.695000e+09,-314000000.0,1.365000e+09,3.658000e+09,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,LRCX,balance_sheet,2021-06-30,,,,,,,,...,,,,,,,,,,
102,LRCX,balance_sheet,2022-06-30,1.011208e+09,4.313818e+09,-1.440325e+09,7.414916e+09,-5606000.0,,1.124381e+09,...,,,,,,,,,,
103,LRCX,balance_sheet,2023-06-30,4.707020e+08,2.823376e+09,-1.642456e+09,7.809002e+09,-5344000.0,,1.286849e+09,...,,,,,,,,,,
104,LRCX,balance_sheet,2024-06-30,6.139660e+08,2.519250e+09,-1.860664e+09,8.223046e+09,-5277000.0,,1.605802e+09,...,,,,,,,,,,


In [36]:
# info_data.groupby(['ticker', 'fullexchangename']).size()
# fin_data.groupby(['ticker']).size()

In [37]:
quantum_tickers = fin_data['ticker'].unique().tolist()
quantum_tickers

['HON',
 'PH',
 'MMM',
 'TDY',
 'KEYS',
 'AEIS',
 'APD',
 'QUBT',
 'EMR',
 'SWKS',
 'QRVO',
 'TXN',
 'ADI',
 'FTV',
 'GTLS',
 'LIN',
 'AI.PA',
 'MKSI',
 'CRYM',
 'ACLS',
 'VECO',
 'LRCX']

In [38]:
from financialtools.config import sec_sector_metric_weights

weights = pd.DataFrame([
    {'sector': sector, 'metrics': metric, 'weights': score}
    for sector, metrics in sec_sector_metric_weights.items()
    for metric, score in metrics.items()
])

In [39]:
from financialtools.processor import FundamentalTraderAssistant

In [40]:
def analyze_ticker(ticker, fin_data, info_data, weights):
    data_ticker=fin_data[fin_data['ticker'] == ticker]
    info_data_ticker=info_data[info_data['ticker'] == ticker]
    exchange = info_data_ticker['fullexchangename'].iloc[0]
    sector_ticker = info_data_ticker['sectorkey'].iloc[0]
    weights=weights[weights['sector'] == sector_ticker]
    merged_data = data_ticker.merge(info_data_ticker, how="left", on=["ticker"])
    merged_data['time'] = merged_data['time'].dt.year
    assistant = FundamentalTraderAssistant(merged_data,  weights=weights)
    fa_tables = assistant.evaluate()
    return fa_tables
    print(data_ticker)

fa_analysis = [analyze_ticker(tkr, fin_data, info_data, weights) for tkr in quantum_tickers]

In [41]:
fa_analysis

[{'metrics':   ticker  time  GrossMargin  OperatingMargin  NetProfitMargin  EBITDAMargin  \
  0    HON  2020          NaN              NaN              NaN           NaN   
  1    HON  2021     0.358543         0.180274         0.161142      0.255903   
  2    HON  2022     0.369904         0.181216         0.140021      0.225484   
  3    HON  2023     0.372784         0.193225         0.154329      0.248213   
  4    HON  2024     0.380851         0.198971         0.148190      0.249493   
  
          ROA       ROE  FCFToRevenue  FCFYield  FCFtoDebt  DebtToEquity  \
  0       NaN       NaN           NaN       NaN        NaN           NaN   
  1  0.085962  0.298454      0.149541  0.039871   0.249285      1.111045   
  2  0.079743  0.297419      0.127108  0.034948   0.219506      1.229981   
  3  0.091963  0.356837      0.117315  0.033343   0.199712      1.358224   
  4  0.075868  0.306407      0.128137  0.038243   0.153080      1.730759   
  
     CurrentRatio  
  0           NaN  
 

In [42]:
from dotenv import load_dotenv 
load_dotenv()

from langchain_core.output_parsers import PydanticOutputParser
from langchain.output_parsers import OutputFixingParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

from financialtools.pydantic_models import StockRegimeAssessment
from financialtools.prompts import system_prompt_StockRegimeAssessment
from financialtools.utils import get_sector_for_ticker, get_market_metrics, dataframe_to_json

In [43]:
from pydantic import BaseModel, Field
from typing import Literal, Optional

In [44]:
class StockRegimeAssessment(BaseModel):
    ticker: str = Field(
        ..., description='The ticker of the stock under analysis'
    )
    regime: Literal["bull", "bear", "sideway"] = Field(
        ..., description="The fundamental regime classification of the stock"
    )
    regime_rationale: str = Field(
        ..., description="Concise explanation justifying the regime classification based on the financial metrics, composite ratio and red flags"
    )
    metrics_movement: str = Field(
        ..., description=(
            "A summary description of how key financial metrics have moved across years, "
            "e.g., 'GrossMargin increased steadily, DebtToEquity rose sharply, FCFYield remained stable.'"
        )
    )
    non_aligned_findings: Optional[str] = Field(
        None,
        description=(
            "Observations or signals that are not aligned with the overall metric trends, "
            "such as contradictory indicators, anomalies."
        )
    )
    evaluation: Literal["overvaluated", "undervaluated", "fair"] = Field(
        ..., description="The valuation of the stock based on the Evaluation metrics"
    )

In [45]:
# Instantiate the LLM (OpenAI GPT-4 or your preferred model)
llm = ChatOpenAI(model="gpt-4.1-nano", temperature=0)

# Instantiate the parser with the Pydantic model
parser = PydanticOutputParser(pydantic_object=StockRegimeAssessment)
# Wrap your parser with OutputFixingParser
parser = OutputFixingParser.from_llm(parser=parser, llm=llm)
from langchain.output_parsers import OutputFixingParser

parser = OutputFixingParser.from_llm(parser=parser, llm=llm)

# Get the format instructions string from the parser
format_instructions = parser.get_format_instructions()


In [46]:
fa_prompt = """
    You are a trader assistant specializing in fundamental analysis. 

    Based on the following financial data, provide a concise overall assessment that classifies 
    the stock’s current fundamental regime as one of:

    - bull: Strong and improving fundamentals supporting a positive outlook.
    - bear: Weak or deteriorating fundamentals indicating risk or decline.

    Financial data constists of financial metrics, evaluation metrics, composite score and red flags.

    Financial metrics provided are the following:

    Profitability and Margin Metrics:
        -GrossMargin: gross profit / total revenue 
        -OperatingMargin: operating income / total revenue
        -NetProfitMargin: net income / total revenue
        -EBITDAMargin: ebitda / total revenue
    Returns metrics:
        -ROA: net income / total assets
        -ROE: net income / total equity
    Cash Flow Strength metrics: 
        -FCFToRevenue: free cash flow / total revenue
        -FCFYield: free cash flow / market capitalization
        -FCFToDebt:: free cash flow / total debt
    Leverage & Solvency metrics:
        -DebtToEquity: total debt / total equity
    Liquidity metrics:
        -CurrentRatio: working capital / total liabilities

    Evaluation metrics provided are the following:
        -bvps: total equity / shares outstanding
        -fcf_per_share: free cash flow / shares outstanding
        -eps: earning per share
        -P/E: current stock price / eps
        -P/B: current stock price / bvps
        -P/FCF: current stock price / fcf_per_share
        -EarningsYield: eps / current stock price
        -FCFYield: free cash flow / market capitalization

    The composite score is a weighted average (1 to 5) that summarizes the company’s overall fundamental health.
    It reflects profitability, efficiency, leverage, liquidity, and cash flow strength, based on the above mentioned financial metrics (evaluation metrics do not kick in the calculation).

    The composite score ranges:
    1 = Weak fundamentals
    5 = Strong fundamentals

    Each financial metric (evaluation metric do not kick in in the calculation) is scored on a 1–5 scale and multiplied by its weight. The composite score is the sum of weighted scores divided by the total weight.

    A red flag is an early warning signal that highlights potential weaknesses in a company’s financial statements 
    or business quality. These warnings do not always mean immediate distress, but they indicate heightened risk that 
    traders should carefully consider before taking a position.


"""

system_prompt_filled = system_prompt_StockRegimeAssessment.format(format_instructions=format_instructions)

In [47]:
def fa_augmented_analysis(analysis, system_prompt_filled, llm, parser):
    ticker = analysis['metrics']['ticker'].iloc[0]
    metrics = analysis['metrics']
    eval_metrics = analysis['composite_scores']
    composite_scores = analysis['eval_metrics']
    red_flags = pd.concat([analysis['red_flags'], analysis['raw_red_flags']], axis=0, ignore_index=True)
    metrics, eval_metrics, composite_scores, red_flags = [dataframe_to_json(df) for df in [metrics, eval_metrics, composite_scores, red_flags]]
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt_filled),
        ("human", "Ticker:\n{ticker}\nMetrics:\n{metrics}\nScores:\n{composite_scores}\nEvaluation Metrics:\n{eval_metrics}\nRedFlags:\n{red_flags}\n"),
    ])
    chain = prompt | llm | parser
    response = chain.invoke({
        "ticker": ticker,
        "metrics": metrics,  
        "eval_metrics": eval_metrics,
        "composite_scores": composite_scores,
        "red_flags": red_flags,   
    })
    return response

In [48]:
fa_report = [fa_augmented_analysis(analysis, system_prompt_filled, llm, parser) for analysis in fa_analysis]

In [49]:
import rich
for report in fa_report:
    rich.print(report)