In [16]:
import pandas as pd
import numpy as np
import logging
import joblib
from sklearn.preprocessing import LabelEncoder

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Load Trained Model and Data
xgb_model = joblib.load("xgboost_model.pkl")  # Load the best-performing model
test_df = pd.read_csv("cleaned_stock_data_test.csv")

# Standardize column names
test_df.columns = test_df.columns.str.strip().str.lower().str.replace(' ', '_')

# Identify adjusted close column
adj_close_col = [col for col in test_df.columns if 'adj' in col.lower() and 'close' in col.lower()]
if not adj_close_col:
    raise KeyError("❌ No column found for adjusted close price!")
adj_close_col = adj_close_col[0]

# **Fix: Convert Categorical Columns to Numeric**
categorical_cols = test_df.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    test_df[col] = le.fit_transform(test_df[col].astype(str))  # Convert to numeric
    label_encoders[col] = le

# Define Features for Model Prediction (After Encoding)
features = [col for col in test_df.columns if col not in ['ticker', 'date', 'close', adj_close_col, 'price_movement']]
X_test = test_df[features]

# Ensure all columns are numeric
X_test = X_test.astype(float)

# **Predict Market Movement**
test_df['predicted_movement'] = xgb_model.predict(X_test)  # 1 = Up, 0 = Down

# **Define Trading Strategy**
class TradingStrategy:
    def __init__(self, initial_cash=10000, risk_factor=0.05, profit_target=0.10, stop_loss=0.05):
        self.cash = initial_cash
        self.shares = 0
        self.risk_factor = risk_factor
        self.profit_target = profit_target
        self.stop_loss = stop_loss
        self.trading_log = []

    def execute_trade(self, index, row):
        close_price = row[adj_close_col]
        predicted_movement = row['predicted_movement']
        
        # Risk-based position sizing (5% of cash per trade)
        trade_size = (self.cash * self.risk_factor) // close_price if close_price > 0 else 0
        
        if predicted_movement == 1:  # Predicted Up
            if self.cash >= trade_size * close_price:
                self.shares += trade_size
                self.cash -= trade_size * close_price
                action = f"BUY {trade_size} shares at {close_price:.2f}"
            else:
                action = "HOLD (Insufficient Cash)"
        
        elif predicted_movement == 0 and self.shares > 0:  # Predicted Down, and we have shares to sell
            current_value = self.shares * close_price
            initial_value = self.shares * row['open']  # Assuming bought at the open price
            
            # Profit target reached
            if current_value >= initial_value * (1 + self.profit_target):
                self.cash += current_value
                self.shares = 0
                action = f"SELL (Profit Target Reached) {self.shares} shares at {close_price:.2f}"
            
            # Stop loss triggered
            elif current_value <= initial_value * (1 - self.stop_loss):
                self.cash += current_value
                self.shares = 0
                action = f"SELL (Stop Loss) {self.shares} shares at {close_price:.2f}"
            else:
                action = "HOLD"
        
        else:
            action = "HOLD"

        # Log trade
        self.trading_log.append({
            "date": row["date"],
            "price": close_price,
            "action": action,
            "cash_balance": self.cash,
            "shares_held": self.shares
        })

        return action

# Initialize Strategy
strategy = TradingStrategy()

# Apply Strategy to Test Data
test_df["trade_action"] = test_df.apply(lambda row: strategy.execute_trade(row.name, row), axis=1)

# Convert Log to DataFrame
trade_log_df = pd.DataFrame(strategy.trading_log)

# **Performance Evaluation**
final_value = strategy.cash + (strategy.shares * test_df.iloc[-1][adj_close_col])  # Cash + Remaining Shares Value
initial_value = 10000
profit_percentage = ((final_value - initial_value) / initial_value) * 100

logging.info(f"Final Portfolio Value: ${final_value:.2f} (Profit: {profit_percentage:.2f}%)")
logging.info("✅ Trading Strategy Successfully Executed!")

# Save Trading Log
trade_log_df.to_csv("trading_log.csv", index=False)
logging.info("📊 Trading log saved as 'trading_log.csv'.")

# Display Strategy Summary
print("\n🔹 Trading Strategy Results 🔹")
print(f"Initial Cash: $10,000")
print(f"Final Portfolio Value: ${final_value:.2f}")
print(f"Total Profit/Loss: {profit_percentage:.2f}%")
print("\n📌 Trade Log Preview:")
display(trade_log_df.head())


  test_df = pd.read_csv("cleaned_stock_data_test.csv")


ValueError: Feature shape mismatch, expected: 27, got 22