In [1]:
import pandas as pd
import yfinance as yf
from datetime import date, timedelta

In [2]:
end_date = date.today().strftime("%Y-%m-%d")
start_date = (date.today() - timedelta(days=365)).strftime("%Y-%m-%d")

In [6]:
# list of stock tickers to download
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
sp500_table = pd.read_html(url)[0]
all_tickers = sp500_table['Symbol'].tolist()
print(f"✅ Found {len(all_tickers)} tickers")

data = yf.download(all_tickers, start=start_date, end=end_date, progress=False)

✅ Found 503 tickers



2 Failed downloads:
['BF.B']: YFPricesMissingError('possibly delisted; no price data found  (1d 2024-04-05 -> 2025-04-05)')
['BRK.B']: YFTzMissingError('possibly delisted; no timezone found')


In [7]:
import yfinance as yf
from tqdm import tqdm

# STEP 1: Define or load your original list of tickers
original_tickers = [
    'AAPL', 'MSFT', 'GOOGL', 'TSLA', 'AMZN', 'NVDA', 'META', 'BRK.B', 'BF.B'
]

# STEP 2: Convert problematic tickers
def convert_ticker_yf(ticker):
    return ticker.replace('.', '-')

yf_tickers = [convert_ticker_yf(t) for t in original_tickers]

# STEP 3: Fetch basic data from Yahoo Finance
stock_data = {}
failed_tickers = []

for ticker in tqdm(yf_tickers, desc="📡 Fetching ticker data"):
    try:
        info = yf.Ticker(ticker).info
        stock_data[ticker] = {
            'marketCap': info.get('marketCap'),
            'currentRatio': info.get('currentRatio'),
            'debtToEquity': info.get('debtToEquity'),
            'grossMargins': info.get('grossMargins'),
            'totalRevenue': info.get('totalRevenue'),
        }
    except Exception as e:
        print(f"⚠️ Error fetching {ticker}: {e}")
        failed_tickers.append(ticker)

# STEP 4: Preview results
print("\n✅ Sample Fetched Data:")
for t, data in list(stock_data.items())[:3]:
    print(f"{t}: {data}")

if failed_tickers:
    print("\n🚫 Failed tickers:", failed_tickers)

📡 Fetching ticker data: 100%|██████████| 9/9 [00:02<00:00,  4.31it/s]


✅ Sample Fetched Data:
AAPL: {'marketCap': 2829863354368, 'currentRatio': 0.923, 'debtToEquity': 145.0, 'grossMargins': 0.46519002, 'totalRevenue': 395760009216}
MSFT: {'marketCap': 2675043205120, 'currentRatio': 1.351, 'debtToEquity': 33.998, 'grossMargins': 0.69408995, 'totalRevenue': 261802000384}
GOOGL: {'marketCap': 1786628538368, 'currentRatio': 1.837, 'debtToEquity': 8.655, 'grossMargins': 0.582, 'totalRevenue': 350018011136}





In [8]:
# streamlit_app.py

import streamlit as st
import yfinance as yf
import pandas as pd
import joblib
import numpy as np

# Load your trained ML model and scaler
model = joblib.load("bankruptcy_model.pkl")
scaler = joblib.load("scaler.pkl")
expected_features = joblib.load("features_list.pkl")  # Must be saved during training

# Your full list of 503 tickers
# list of stock tickers to download
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
sp500_table = pd.read_html(url)[0]
all_tickers = sp500_table['Symbol'].tolist()

st.title("📉 AI Bankruptcy Risk Predictor")

selected_ticker = st.selectbox("Choose a company:", sorted(all_tickers))

if st.button("🔍 Analyze Ticker"):
    try:
        # Get latest stock info
        yf_ticker = selected_ticker.replace(".", "-")
        info = yf.Ticker(yf_ticker).info

        # Map selected features to model input
        feature_map = {
            'ROA(C) before interest and depreciation before interest': info.get('returnOnAssets', 0),
            'Operating Gross Margin': info.get('grossMargins', 0),
            'Current Ratio': info.get('currentRatio', 0),
            'Debt to Equity': info.get('debtToEquity', 0),
            'Total Revenue': info.get('totalRevenue', 0),
            # ... more mappings as needed
        }

        # Build feature vector
        input_vector = [feature_map.get(feat, 0.0) for feat in expected_features]
        X_input_scaled = scaler.transform([input_vector])

        # Predict
        pred = model.predict(X_input_scaled)[0]
        prob = model.predict_proba(X_input_scaled)[0][1]

        st.success("✅ Low Risk" if pred == 0 else "⚠️ High Risk")
        st.metric(label="📊 Bankruptcy Probability", value=f"{prob:.2%}")

    except Exception as e:
        st.error(f"Error fetching data for {selected_ticker}: {e}")

2025-04-05 18:23:32.827 
  command:

    streamlit run /Users/lakshmikrishnan/miniconda3/envs/bankruptcy-ai/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-04-05 18:23:32.829 Session state does not function when running a script without `streamlit run`


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib

# -------------------------------
# STEP 1: Load the dataset
# -------------------------------
df = pd.read_csv("data.csv")  # Update path if needed
X = df.drop("Bankrupt?", axis=1)
y = df["Bankrupt?"]

# -------------------------------
# STEP 2: Train/test split
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------------
# STEP 3: Preprocessing
# -------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save for future use
joblib.dump(scaler, "scaler.pkl")

# -------------------------------
# STEP 4: Train Model
# -------------------------------
model = RandomForestClassifier(random_state=42, class_weight="balanced")
model.fit(X_train_scaled, y_train)

# Save model
joblib.dump(model, "bankruptcy_model.pkl")

# -------------------------------
# STEP 5: Use a real row from data
# -------------------------------
# Let's say row 100
sample_index = 150
sample_features = X.iloc[sample_index].values.reshape(1, -1)
sample_scaled = scaler.transform(sample_features)

# -------------------------------
# STEP 6: Predict
# -------------------------------
prediction = model.predict(sample_scaled)[0]
prediction_prob = model.predict_proba(sample_scaled)[0][1]

print(f"\n📦 Company #{sample_index}")
print("🔎 Bankruptcy Risk Prediction:", "⚠️ High Risk" if prediction == 1 else "✅ Low Risk")
print(f"📊 Risk Score: {prediction_prob:.2f}")


📦 Company #150
🔎 Bankruptcy Risk Prediction: ✅ Low Risk
📊 Risk Score: 0.00


