In [3]:
import pandas as pd

In [4]:
df = pd.read_csv(r"C:\Users\91807\OneDrive\Desktop\Data science\Projects\GIT\BCG_GEN_AI\BCG_X_GEN_AI\TASK_1\Task_1_data_Extraction_Initial_analysis.csv",sep=",", skiprows=2)

In [5]:
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

In [6]:
df.columns = ['Company_Name', 'Year', 'Total_Revenue', 'Net_Income', 'Total_Assets','Total_Liabilities', 'Operating_Cash_Flow']

In [7]:
print(df.columns.tolist())

['Company_Name', 'Year', 'Total_Revenue', 'Net_Income', 'Total_Assets', 'Total_Liabilities', 'Operating_Cash_Flow']


In [8]:
# Sort before group operations
df=df.sort_values(['Company_Name', 'Year'])

In [9]:
#calculating year-over-year changes for each financial metric
df['Revenue_Growth(%)']= df.groupby('Company_Name')['Total_Revenue'].pct_change() * 100
df['Net_Income_Growth(%)'] = df.groupby('Company_Name')['Net_Income'].pct_change() * 100
df['Assets_Growth(%)'] = df.groupby('Company_Name')['Total_Assets'].pct_change() * 100
df['Operating_Cash_Flow_Growth(%)'] = df.groupby('Company_Name')['Operating_Cash_Flow'].pct_change() * 100


In [10]:
# Average YoY growth by company
summary = df.groupby('Company_Name')[['Revenue_Growth(%)',
                                 'Net_Income_Growth(%)',
                                 'Assets_Growth(%)',
                                 'Operating_Cash_Flow_Growth(%)']].mean().round(2)
df.fillna({'Revenue_Growth(%)': 0,
           'Net_Income_Growth(%)': 0,
           'Assets_Growth(%)': 0,
           'Operating_Cash_Flow_Growth(%)'
           'Rate_of_Assets': 0}, inplace=True)

In [11]:
print(summary)

              Revenue_Growth(%)  Net_Income_Growth(%)  Assets_Growth(%)  \
Company_Name                                                              
Apple                      2.39                 -0.90              3.54   
Microsoft                 15.30                 18.67              5.71   
Tesla                      0.24                  1.22              4.08   

              Operating_Cash_Flow_Growth(%)  
Company_Name                                 
Apple                                  4.65  
Microsoft                             25.11  
Tesla                                  0.08  


In [12]:
df['Rate_of_Assets'] = (df['Total_Revenue'] / df['Total_Assets']) * 100
rate_summary = df.groupby('Company_Name')['Rate_of_Assets'].mean().round(2)
print(rate_summary)

Company_Name
Apple        107.38
Microsoft     43.61
Tesla         85.28
Name: Rate_of_Assets, dtype: float64


In [36]:
import os
import difflib
import re
from flask import Flask, request, render_template_string


In [69]:
df_growth = pd.DataFrame({
    "Company_Name": ["Apple", "Microsoft", "Tesla"],
    "Revenue_Growth(%)": [2.39, 15.30, 0.24],
    "Net_Income_Growth(%)": [-0.90, 18.67, 1.22],
    "Assets_Growth(%)": [3.54, 5.71, 4.08],
    "Operating_Cash_Flow_Growth(%)": [4.65, 25.11, 0.08],
})

COMPANIES = {c.lower(): c for c in df_growth["Company_Name"].tolist()}

def pct(v):
    return f"{v:.2f}%" if isinstance(v, (int, float, float)) else "N/A"

def normalize(q: str) -> str:
    q = q.lower().strip()
    q = q.replace("’", "'").replace("“", '"').replace("”", '"')
    return q.rstrip("?.! ")

def tokenize(q: str) -> set:
    return set(re.findall(r"[a-z0-9]+", normalize(q)))

def find_company(user_query: str) -> str | None:
    """Detect the company mentioned in the user query."""
    q = normalize(user_query)
    toks = tokenize(q)
    # Direct token match
    for tk in toks:
        if tk in COMPANIES:
            return COMPANIES[tk]
    # Fuzzy match for typos (e.g., micorsoft → Microsoft)
    best, score = None, 0.0
    for lc, proper in COMPANIES.items():
        s = difflib.SequenceMatcher(a=q, b=lc).ratio()
        if s > score:
            best, score = proper, s
    return best if score >= 0.75 else None

def row_for(company: str) -> pd.Series | None:
    m = df_growth["Company_Name"] == company
    if not m.any():
        return None
    return df_growth.loc[m].iloc[0]

def handle_revenue(company: str) -> str:
    r = row_for(company)
    if r is None: return f"No data found for {company}."
    return f"{company}: Revenue YoY growth = {pct(r['Revenue_Growth(%)'])}"

def handle_profitability(company: str) -> str:
    r = row_for(company)
    if r is None: return f"No data found for {company}."
    parts = [
        f"Operating Cash Flow growth = {pct(r['Operating_Cash_Flow_Growth(%)'])}",
        f"Net Income growth = {pct(r['Net_Income_Growth(%)'])}",
        f"Assets growth = {pct(r['Assets_Growth(%)'])}",
    ]
    return f"{company}: " + " | ".join(parts)

def handle_roa(company: str) -> str:
    # ROA not present in HTML data
    return f"{company}: ROA not available in the attached dataset."

def handle_cfo(company: str) -> str:
    r = row_for(company)
    if r is None: return f"No data found for {company}."
    val = r['Operating_Cash_Flow_Growth(%)']
    if val < 5: msg = "poor conversion of earnings to cash."
    elif val < 15: msg = "decent operational cash."
    elif val < 25: msg = "very good; high-quality earnings."
    else: msg = "exceptional efficiency."
    return f"{company}: Operating Cash Flow growth = {pct(val)} — {msg}"

EXACT_PATTERNS = {
    "revenue": [
        "how has the company's revenue grown year-over-year",
        "how has the company’s revenue grown year-over-year",
        "revenue yoy",
        "revenue growth",
    ],
    "profitability": [
        "is the company becoming more profitable over time? show me the trend in gross, operating, and net profit margins.",
        "profitability trend",
        "gross operating and net profit margins",
    ],
    "roa": [
        "how efficiently is the company using its assets to generate income (roa)?",
        "roa",
    ],
    "cfo": [
        "is the company generating consistent positive cash flow from operations, and how is that changing over time?",
        "operating cash flow growth",
        "cash flow from operations",
    ],
}

HANDLERS = {
    "revenue": handle_revenue,
    "profitability": handle_profitability,
    "roa": handle_roa,
    "cfo": handle_cfo,
}



In [None]:
def simple_chatbot(user_query: str) -> str:
    q = normalize(user_query)
    company = find_company(q) 

    if not company:
        return ("Please include a company name (Apple, Microsoft, or Tesla). "
                "Example: 'Apple revenue YoY' or 'profitability trend for Microsoft'.")

  
    for intent, phrasings in EXACT_PATTERNS.items():
        if q in [normalize(p) for p in phrasings]:
            return HANDLERS[intent](company)

    
    pairs = [(intent, p) for intent, ps in EXACT_PATTERNS.items() for p in ps]
    best_intent, best_score = None, 0.0
    for intent, p in pairs:
        score = difflib.SequenceMatcher(a=q, b=normalize(p)).ratio()
        if score > best_score:
            best_intent, best_score = intent, score
    if best_score >= 0.78:
        return HANDLERS[best_intent](company)


    toks = tokenize(q)
    keyword_rules = [
        ({"revenue", "growth", "yoy", "year", "over"}, "revenue"),
        ({"profit", "profitable", "margin", "gross", "operating", "net"}, "profitability"),
        ({"roa", "assets", "efficiency", "return"}, "roa"),
        ({"cash", "flow", "operations", "cfo"}, "cfo"),
    ]
    best_intent_kw, best_hits = None, 0
    for kwset, intent in keyword_rules:
        hits = len(kwset & toks)
        if hits > best_hits:
            best_hits, best_intent_kw = hits, intent
    if best_intent_kw and best_hits > 0:
        return "(Approximate) " + HANDLERS[best_intent_kw](company)

    return ("I’m not sure yet. Try: 'Apple revenue YoY', "
            "'profitability trend Microsoft', or 'Tesla operating cash flow growth'.")


In [70]:

if __name__ == "__main__":
    name = input("Hi! I’m the Financial Chatbot. What should I call you? ").strip() or "Guest"
    print(f"Hi {name}! Pleasure to assist you. Ask a financial question (or type 'exit').")
    while True:
        q = input("> ").strip()
        if normalize(q) == "exit":
            print("Goodbye!")
            break
        print(f"\n{name}, here's my response:\n{simple_chatbot(q)}\n")

Hi! I’m the Financial Chatbot. What should I call you?  vineeta


Hi vineeta! Pleasure to assist you. Ask a financial question (or type 'exit').


>  how has the company’s revenue grown year-over-year



vineeta, here's my response:
Please include a company name (Apple, Microsoft, or Tesla). Example: 'Apple revenue YoY' or 'profitability trend for Microsoft'.



>  Microsoft



vineeta, here's my response:
I’m not sure yet. Try: 'Apple revenue YoY', 'profitability trend Microsoft', or 'Tesla operating cash flow growth'.



>  profitability trend Microsoft



vineeta, here's my response:
Microsoft: Operating Cash Flow growth = 25.11% | Net Income growth = 18.67% | Assets growth = 5.71%



>  thankyou



vineeta, here's my response:
Please include a company name (Apple, Microsoft, or Tesla). Example: 'Apple revenue YoY' or 'profitability trend for Microsoft'.



>  exit


Goodbye!
