In [None]:
!pip install transformers
!pip install torch

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

In [None]:
import torch
from huggingface_hub import login
from transformers import pipeline
import pandas as pd
import os
from sqlalchemy import create_engine, MetaData, select, Table
from sqlalchemy.orm import Session
import numpy as np
import textwrap

In [None]:
HF_token = "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"
login(token=HF_token)

In [None]:
model_id = "meta-llama/Llama-3.2-3B-Instruct"

pipe = pipeline(
  "text-generation",
  model=model_id,
  torch_dtype=torch.bfloat16,
  device_map="auto",
)

In [None]:
weights = {
    'value': {
        'P/E Ratio': -4,            # Lower P/E indicates undervaluation, but excessive negativity can skew results.
        'P/B Ratio': -3,            # Important for value investors to find companies trading below book value.
        'D/E Ratio': -2,            # Avoid overly leveraged companies; moderate weight.
        'Free Cash Flow': 3,        # Free cash flow is highly desirable, showing financial health and flexibility.
        'Dividend Yield': 3,        # Value stocks often pay dividends; moderate weight here.
        'Net Income': 1,            # Positive net income indicates profitability; important but less critical.
        'Revenue': 1,               # Indicates size and potential; less focus for value investors.
        'Operating Cash Flow': 2,   # Indicates operational efficiency and sustainability.
        'CapEx': -1                 # High CapEx can indicate reinvestment but also increased spending; slight penalty.
    },
    'growth': {
        'Revenue': 5,               # Revenue growth is key for growth investors; highest weight.
        'Net Income': 2,            # Indicates profitability but not the main focus for early-stage growth.
        'Operating Cash Flow': 3,   # Indicates efficient operations; critical for long-term growth sustainability.
        'Free Cash Flow': 2.5,      # Important for growth companies to reinvest in future projects.
        'CapEx': 1.5,               # Growth companies often reinvest heavily; moderate weight.
        'Dividend Yield': -2,       # Growth companies often don’t pay dividends; negative weight.
        'P/E Ratio': 1.5,           # Valuation matters but secondary to growth potential.
        'P/B Ratio': 1,             # Low weight as book value is less relevant for growth.
        'D/E Ratio': -0.5           # Slight penalty for excessive leverage, but growth investors tolerate it.
    },
    'income': {
        'Revenue': 1,               # Provides context for company size and dividend sustainability.
        'Net Income': 2,            # Indicates earnings available to distribute as dividends.
        'Operating Cash Flow': 2.5, # Reflects the ability to generate cash for dividends.
        'Free Cash Flow': 3.5,      # High weight, as free cash flow supports consistent dividend payouts.
        'CapEx': -1.5,              # High CapEx can detract from funds available for dividends.
        'Dividend Yield': 8,        # Most critical metric for income investors.
        'P/E Ratio': 0.5,           # Valuation matters, but not a primary focus.
        'P/B Ratio': 0.5,           # Book value has a minor role for income stocks.
        'D/E Ratio': -1             # Too much debt can threaten dividend sustainability; penalty.
    }
}


In [None]:
def normalize(values):
    """
    Normalize a list of values using min-max scaling to range [0, 1].
    """
    mean_val = np.mean(values)
    std_dev = np.std(values)
    if std_dev != 0:
        return [(val - mean_val) / std_dev for val in values]
    return [0] * len(values)  # If standard deviation is 0 (i.e., all values are the same)


def calculate_scores(ratios, metrics, investor_type, weights):

    num_periods = len(ratios)

    # Store normalized values in respective lists
    pe_normalized, pb_normalized, de_normalized = [], [], []
    fcf_normalized, div_yield_normalized = [], []
    net_income_normalized, revenue_normalized = [], []
    op_cash_flow_normalized, capex_normalized = [], []

    # Normalize each value by iterating over all periods (quarters)
    for i in range(num_periods):

        metric_row = metrics.iloc[i]
        market_cap = metric_row["Adjusted_Price_Per_Share"] * metric_row["Total_Common_Shares_Outstanding"]

        # Normalizing ratios (P/E, P/B, D/E, Dividend Yield)
        pe_normalized.append(ratios['Price_To_Earnings_Ratio'].iloc[i])
        pb_normalized.append(ratios['Price_To_Book_Ratio'].iloc[i])
        de_normalized.append(ratios['Debt_To_Equity_Ratio'].iloc[i])
        div_yield_normalized.append(ratios['Dividend_Yield'].iloc[i])
        fcf_normalized.append(ratios['Free_Cash_Flow'].iloc[i])

        # Normalizing metrics (Net Income, Revenue, Operating Cash Flow, CapEx) by market cap
        net_income_normalized.append(metric_row['Net_Income'] / market_cap)
        revenue_normalized.append(metric_row['Revenue'] / market_cap)
        op_cash_flow_normalized.append(metric_row['Operating_Cash_Flow'] / market_cap)
        capex_normalized.append(metric_row['Capital_Expenditure'] / market_cap)


    # Now normalize the ratios and metrics using min-max scaling
    pe_normalized = normalize(pe_normalized)
    pb_normalized = normalize(pb_normalized)
    de_normalized = normalize(de_normalized)
    div_yield_normalized = normalize(div_yield_normalized)
    fcf_normalized = normalize(fcf_normalized)

    net_income_normalized = normalize(net_income_normalized)
    revenue_normalized = normalize(revenue_normalized)
    op_cash_flow_normalized = normalize(op_cash_flow_normalized)
    capex_normalized = normalize(capex_normalized)

    scores = []

    for i in range(num_periods-1, -1, -1):  # Reverse iteration (most recent to oldest)

        # For ratios (P/E, P/B, D/E), apply the respective weights
        pe_score = pe_normalized[i] * weights[investor_type]["P/E Ratio"]
        pb_score = pb_normalized[i] * weights[investor_type]["P/B Ratio"]
        de_score = de_normalized[i] * weights[investor_type]["D/E Ratio"]
        div_yield_score = div_yield_normalized[i] * weights[investor_type]["Dividend Yield"]
        fcf_score = fcf_normalized[i] * weights[investor_type]["Free Cash Flow"]

        # For metrics (Net Income, Revenue, Operating Cash Flow, CapEx), apply the respective weights
        net_income_score = net_income_normalized[i] * weights[investor_type]['Net Income']
        revenue_score = revenue_normalized[i] * weights[investor_type]["Revenue"]
        op_cash_flow_score = op_cash_flow_normalized[i] * weights[investor_type]["Operating Cash Flow"]
        capex_score = capex_normalized[i] * weights[investor_type]["CapEx"]

        # Calculate the total score for this period
        total_score = (pe_score + pb_score + de_score + div_yield_score + fcf_score +
                      net_income_score + revenue_score + op_cash_flow_score + capex_score)
        scores.append(total_score)

    # Average the scores across all periods (for normalization later if needed)
    final_score = sum(scores) / len(scores)

    return final_score


In [None]:
def get_summary(company_id, year, table):
    try:
        summary = session.query(table).filter(
            table.c.CompanyID == int(company_id),
            table.c.Year == int(year)
        ).one_or_none()

        if summary:
            return summary.SummaryText
        else:
            return None  # No summary found for the given CompanyID and Year
    except Exception as e:
        print(f"Error querying summary: {e}")
        return None

In [None]:
def stringify_metrics(ratios_df, metrics_df, investor_type):
    """
    Convert financial data into a string tailored to the investor type.

    Parameters:
    - ratios_df: DataFrame containing financial ratios (e.g., P/E, P/B, D/E, EPS).
    - metrics_df: DataFrame containing financial metrics (e.g., Revenue, Net Income).
    - investor_type: String specifying the investor type ("value", "growth", or "income").

    Returns:
    - A string summarizing relevant financial data.
    """
    # Initialize an empty list to collect strings for each period
    summary_strings = []

    # Extract company ID
    company_id = ratios_df["CompanyID"].iloc[0]

    # Iterate through each row to process metrics
    for i in range(len(ratios_df)):
        year = ratios_df["Year"].iloc[i]
        quarter = ratios_df["Quarter"].iloc[i]  # Assuming 'Quarter' column exists

        # Fetch metrics
        p_e = round(ratios_df["Price_To_Earnings_Ratio"].iloc[i], 2)
        p_b = round(ratios_df["Price_To_Book_Ratio"].iloc[i], 2)
        d_e = round(ratios_df["Debt_To_Equity_Ratio"].iloc[i], 2)
        div_yield = round(ratios_df["Dividend_Yield"].iloc[i], 2)
        fcf = round(ratios_df["Free_Cash_Flow"].iloc[i], 2)
        basic_eps = round(metrics_df["Basic_EPS"].iloc[i], 2)
        adjusted_price = round(metrics_df["Adjusted_Price_Per_Share"].iloc[i], 2)
        net_income = round(metrics_df["Net_Income"].iloc[i], 2)
        revenue = round(metrics_df["Revenue"].iloc[i], 2)

        # Customize output based on investor type
        if investor_type == "value":
            metrics = (
                f"[P/E: {p_e}, P/B: {p_b}, D/E: {d_e}, EPS: {basic_eps}, "
                f"Adjusted Price: {adjusted_price}, FCF: {fcf},  Revenue: {revenue}]"
            )
        elif investor_type == "growth":
            metrics = (
                f"[Revenue: {revenue}, Net Income: {net_income}, EPS: {basic_eps}, "
                f"Adjusted Price: {adjusted_price}, FCF: {fcf}]"
            )
        elif investor_type == "income":
            metrics = (
                f"[Dividend Yield: {div_yield}%, D/E: {d_e}, "
                f"Adjusted Price: {adjusted_price}, FCF: {fcf}]"
            )
        else:
            metrics = (
                f"[P/E: {p_e}, P/B: {p_b}, D/E: {d_e}, Dividend Yield: {div_yield}%, "
                f"EPS: {basic_eps}, Adjusted Price: {adjusted_price}, FCF: {fcf}, "
                f"Net Income: {net_income}, Revenue: {revenue}]"
            )

        # Append metrics for the current period
        summary_strings.append(
            f"Year: {year}, Quarter: {quarter}, Metrics: {metrics}"
        )

    # Combine all period summaries with new lines for readability
    summary_string = "\n".join(summary_strings)

    return summary_string


In [None]:
def generate_recommendation(prompt, investor_type):

    investor_messages = {
        "value": """
- Focus on P/E and P/B ratios to assess undervaluation or overvaluation.
- Evaluate D/E for financial stability and FCF for cash generation.
- Look for consistent revenue growth and positive trends in profitability metrics.
- Consider if the company has a competitive advantage or is undervalued relative to peers.
        """,
        "growth": """
- Emphasize revenue and EPS growth as indicators of a company's expansion potential.
- Accept higher P/E or P/B ratios if justified by strong growth metrics.
- Assess if reinvestment strategies (high D/E) are driving sustainable growth.
- Ignore dividends unless they are part of a broader growth strategy.
        """,
        "income": """
- Focus on dividend yield, payout stability, and consistent FCF.
- Evaluate D/E to ensure financial health and capacity to maintain payouts.
- Stable or growing revenue and earnings are critical for income stability.
- P/E and P/B are secondary but should not indicate overvaluation.
        """
    }

    advisor_prompt = f"""
You are a financial advisor creating a comprehensive investment summary tailored to a {investor_type} investor.
Use the following data sources:
1. Historical financial ratios and metrics (P/E, P/B, D/E, FCF, EPS, Revenue, Dividend Yield).
2. Key trends from five years of quarterly data.
3. Summary of Management's Discussion and Analysis (MD&A) from the most recent 10-K filing.

Guidelines:
{investor_messages[investor_type]}

Structure your response as follows:
1. **Investment Overview**: Provide a concise summary of the company's overall financial health and performance trends.
2. **Key Metrics**: Highlight the most relevant metrics based on investor type and explain their significance.
3. **Insights from MD&A**: Integrate qualitative insights from the MD&A to contextualize the financial data.
4. **Final Recommendation**: Conclude with a clear, polished sentence summarizing the investment's suitability for the specified investor type.

Ensure your output is coherent and ends smoothly. Avoid incomplete sentences.
    """

    # Create messages for the model
    messages = [
        {"role": "system", "content": advisor_prompt},
        {"role": "user", "content": f"Here is the data for analysis:\n{prompt}"},
    ]

    # Generate output using the pipeline
    output = pipe(
        messages,
        max_new_tokens=512,
    )

    # Extract the full response
    return output[0]["generated_text"][-1]["content"]


In [None]:
def custom_wrap_text(text, max_line_length):
  lines = textwrap.wrap(text, width=max_line_length)
  return '\n'.join(lines)

In [None]:
if __name__=="__main__":

  print("""Which type of investing philosophy aligns with your personal investment goals?

  1. Value Investing:  Value investing focuses on identifying stocks that appear undervalued relative to their intrinsic worth, often assessed through financial metrics
                       such as price-to-earnings or price-to-book ratios. Value investors aim to buy these "bargain" securities and profit when the market eventually recognizes their true value.

  2. Growth Investing: Growth investing focuses on stocks with the potential for above-average market growth, often characterized by high revenue or earnings growth, innovation, or industry disruption.
                       While this style offers the potential for significant rewards, it also carries higher risks, as these companies may be more volatile or unproven.

  3. Income Investing: Income investing prioritizes generating a steady stream of passive income, typically through dividend-paying stocks, bonds, or other income-generating assets.
                       While this approach offers limited potential for explosive gains, it provides stability and predictable returns over time.
        """)

  investor_type = ""
  while investor_type not in ['value', 'growth', 'income']:
    investor_type = input("Enter 'value', 'growth', or 'income': ")


  DATABASE_URL = 'postgresql+psycopg2://u381r20ceebmb7:p2c1b3eb128bb09f92c43d005d55f54c36a4a0e5bd110945652252726dfdb6068@c3gtj1dt5vh48j.cluster-czrs8kj4isg7.us-east-1.rds.amazonaws.com:5432/d77oud95l1v4g6'
  engine = create_engine(DATABASE_URL)

  metrics_df = pd.read_sql(f"SELECT * FROM company_metrics", con=engine)
  ratios_df = pd.read_sql(f"SELECT * FROM company_ratios", con=engine)

  metrics_df = metrics_df[metrics_df["Year"] <= 2021]
  ratios_df = ratios_df[ratios_df["Year"] <= 2021]

  # Create a metadata instance
  metadata = MetaData()

  # Reflect the table from the database
  company_info_table = Table('company_information', metadata, autoload_with=engine)
  company_ten_k_summaries = Table('company_ten_k_summaries', metadata, autoload_with=engine)

  unique_ids = list(metrics_df["CompanyID"].unique())
  scores = dict()

  for id in unique_ids:
    metrics = metrics_df[metrics_df["CompanyID"] == id]
    ratios = ratios_df[ratios_df["CompanyID"] == id]
    score = calculate_scores(ratios, metrics, investor_type, weights)
    scores[id] = score

  session = Session(bind=engine)

  # companies = []
  num_rec = 1
  response = "Recommended Investments for the {investor_type} investor:\n"
  while num_rec < 4:

    top_company = max(scores, key=scores.get)
    scores.pop(top_company)

    query = select(company_info_table).where(company_info_table.c.CompanyID == int(top_company))
    company_info = session.execute(query).fetchone()
    # print(f"{company_info.CompanyName} - {company_info.CompanyID}")
    company_data = stringify_metrics(ratios_df[ratios_df["CompanyID"] == top_company], metrics_df[metrics_df["CompanyID"] == top_company], investor_type)
    company_summary = get_summary(top_company, 2022, company_ten_k_summaries)
    if company_summary is None:
      continue
    company_prompt = f"""Numerical Data:
    {company_data}

    Summary of Management and Discussion:
    {company_summary}
    """

    # print(company_prompt)

    company_response = generate_recommendation(company_prompt, investor_type)
    torch.cuda.empty_cache()

    response += f"""\n{num_rec}. {company_info.CompanyName}, {company_info.Sector} Sector:
{company_response}
          """

    num_rec += 1

  print(custom_wrap(response, 150))

  session.close()

Which type of investing philosophy aligns with your personal investment goals?

  1. Value Investing:  Value investing focuses on identifying stocks that appear undervalued relative to their intrinsic worth, often assessed through financial metrics
                       such as price-to-earnings or price-to-book ratios. Value investors aim to buy these "bargain" securities and profit when the market eventually recognizes their true value.

  2. Growth Investing: Growth investing focuses on stocks with the potential for above-average market growth, often characterized by high revenue or earnings growth, innovation, or industry disruption.
                       While this style offers the potential for significant rewards, it also carries higher risks, as these companies may be more volatile or unproven.

  3. Income Investing: Income investing prioritizes generating a steady stream of passive income, typically through dividend-paying stocks, bonds, or other income-generating assets.
 

In [None]:
session.close()

In [None]:
torch.cuda.empty_cache()