In [14]:

import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
import talib
import os
import time

In [17]:
start_time = time.time()

# Step 1: Load all CSV files
data_dir = r"c:\Users\mdkai\Stock-Prediction"  # Replace with your directory path
csv_files = [f for f in os.listdir(data_dir) if f.endswith('.csv')]
company_data = {}

for file in csv_files:
    ticker = file.split('.')[0]  # Extract ticker from filename
    df = pd.read_csv(os.path.join(data_dir, file))
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    company_data[ticker] = df[['close', 'volume']].copy()  # Use 'close' and 'volume'

# Step 2: Compute features for each company
results = []
for ticker, data in company_data.items():
    df = data.copy()
    
    # Compute technical indicators
    df['SMA_20'] = talib.SMA(df['close'], timeperiod=20)
    df['SMA_50'] = talib.SMA(df['close'], timeperiod=50)
    df['RSI'] = talib.RSI(df['close'], timeperiod=14)
    df['MACD'], _, _ = talib.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)
    df['Returns'] = df['close'].pct_change().dropna() * 100  # For GARCH
    
    # Handle missing data
    df = df.dropna()
    
    # Step 3: ARIMA model
    try:
        arima_model = ARIMA(df['close'], order=(5, 1, 0))  # Example order
        arima_fit = arima_model.fit()
        arima_forecast = arima_fit.forecast(steps=10)
        df['ARIMA_Pred'] = arima_fit.fittedvalues
        forecast_return = (arima_forecast[-1] - df['close'].iloc[-1]) / df['close'].iloc[-1]
    except:
        forecast_return = 0  # Fallback if ARIMA fails
        df['ARIMA_Pred'] = df['close']
    
    # Step 4: GARCH model
    try:
        garch_model = arch_model(df['Returns'], vol='Garch', p=1, q=1, dist='Normal')
        garch_fit = garch_model.fit(disp='off')
        df['GARCH_Vol'] = garch_fit.conditional_volatility
        volatility = garch_fit.conditional_volatility.iloc[-1]
    except:
        volatility = df['Returns'].std()  # Fallback volatility
        df['GARCH_Vol'] = volatility
    
    # Step 5: Prepare features for Random Forest
    features = df[['SMA_20', 'SMA_50', 'RSI', 'MACD', 'ARIMA_Pred', 'GARCH_Vol', 'volume']].dropna()
    target = df['close'].shift(-10)[features.index]  # 10-day future price
    features, target = features[:-10], target[:-10]
    
    if len(features) < 50:  # Skip if insufficient data
        continue
    
    # Scale features
    scaler = MinMaxScaler()
    features_scaled = scaler.fit_transform(features)
    
    # Train Random Forest
    train_size = int(len(features) * 0.8)
    X_train, X_test = features_scaled[:train_size], features_scaled[train_size:]
    y_train, y_test = target[:train_size], target[train_size:]
    
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)
    
    # Predict 10-day return
    latest_features = features_scaled[-1].reshape(1, -1)
    predicted_price = rf_model.predict(latest_features)[0]
    current_price = df['close'].iloc[-1]
    predicted_return = (predicted_price - current_price) / current_price
    
    # Combine ARIMA and RF predictions (weighted)
    final_return = 0.5 * forecast_return + 0.5 * predicted_return
    
    # Risk-adjusted return
    risk_adjusted_return = final_return / (volatility + 1e-6)  # Avoid division by zero
    
    results.append({
        'Ticker': ticker,
        'Predicted_Return': final_return,
        'Volatility': volatility,
        'Risk_Adjusted_Return': risk_adjusted_return
    })

# Step 6: Rank companies
results_df = pd.DataFrame(results)
top_companies = results_df.sort_values('Risk_Adjusted_Return', ascending=False)
print("Top Companies to Invest In (Ranked by Risk-Adjusted Return):")
print(top_companies[['Ticker', 'Predicted_Return', 'Volatility', 'Risk_Adjusted_Return']].head(10))

# Optional: Save results
top_companies.to_csv('top_companies.csv', index=False)

end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(d

Top Companies to Invest In (Ranked by Risk-Adjusted Return):
                                 Ticker  Predicted_Return  Volatility  \
12   TSLA_Historical_Mar2024_to_Feb2025          0.213822    4.399956   
9    MSFT_Historical_Mar2024_to_Feb2025          0.056782    1.610133   
4   GOOGL_Historical_Mar2024_to_Feb2025          0.065500    1.884244   
2    AMZN_Historical_Mar2024_to_Feb2025          0.044785    1.910954   
10   NVDA_Historical_Mar2024_to_Feb2025          0.057876    4.116213   
6     JPM_Historical_Mar2024_to_Feb2025         -0.007013    1.596548   
1    ADBE_Historical_Mar2024_to_Feb2025         -0.009705    2.094429   
0    AAPL_Historical_Mar2024_to_Feb2025         -0.011669    1.451311   
11    PEP_Historical_Mar2024_to_Feb2025         -0.017358    1.417558   
8    META_Historical_Mar2024_to_Feb2025         -0.051668    1.723094   

    Risk_Adjusted_Return  
12              0.048596  
9               0.035265  
4               0.034762  
2               0.023436  


  return get_prediction_index(
  return get_prediction_index(
