In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam




In [5]:
def process_stock(file_name):
    # Load data
    df = pd.read_csv(file_name, index_col="Date", parse_dates=True)
    df.dropna(inplace=True)
    
    # Create lagged features
    df['Close'] = df['Close'].astype(float)
    lagged_data = pd.concat([df['Close'].shift(i) for i in range(3)], axis=1).dropna()
    lagged_data.columns = [f'lag_{i}' for i in range(2, -1, -1)]
    
    # Train-test split
    split_idx = int(len(lagged_data) * 0.8)
    X_train, X_test = lagged_data.iloc[:split_idx, 1:].values, lagged_data.iloc[split_idx:, 1:].values
    y_train, y_test = lagged_data.iloc[:split_idx, 0].values, lagged_data.iloc[split_idx:, 0].values
    
    # Standardization
    scaler_X, scaler_y = StandardScaler(), StandardScaler()
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
    y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))
    
    X_train_reshaped = X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1)
    X_test_reshaped = X_test_scaled.reshape(-1, X_test_scaled.shape[1], 1)
    
    # Define the model
    model = Sequential([
        LSTM(64, return_sequences=True, kernel_regularizer=l2(0.0001), input_shape=(X_train_reshaped.shape[1], 1)),
        BatchNormalization(),
        Dropout(0.05),
        
        GRU(256, return_sequences=False, kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.3),
        
        Dense(32, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.2),
        
        Dense(1)
    ])
    
    # Compile model
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mse')
    
    # Train the model
    model.fit(X_train_reshaped, y_train_scaled, epochs=300, batch_size=32, verbose=0)
    
    # Predictions
    y_train_pred_scaled = model.predict(X_train_reshaped, verbose=0)
    y_test_pred_scaled = model.predict(X_test_reshaped, verbose=0)
    
    # Inverse transform
    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled)
    y_test_pred = scaler_y.inverse_transform(y_test_pred_scaled)
    
    # Evaluate performance
    mse_train = mean_squared_error(y_train, y_train_pred)
    mse_test = mean_squared_error(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred)
    r2_test = r2_score(y_test, y_test_pred)
    rmse_train = np.sqrt(mse_train)
    rmse_test = np.sqrt(mse_test)
    
    # Forget train-test data
    del X_train, X_test, y_train, y_test, X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled
    
    # Prepare new prediction data
# Ensure the number of features matches training data (2 features)
    last_3_days = df['Close'].values[-2:].reshape(1, -1)  # Extract last 2 closing prices
    last_3_days_scaled = scaler_X.transform(last_3_days)  # Now matches expected input shape

    last_3_days_reshaped = last_3_days_scaled.reshape(-1, last_3_days_scaled.shape[1], 1)
    
    # Predict next closing price
    next_day_scaled = model.predict(last_3_days_reshaped, verbose=0)
    next_day_price = scaler_y.inverse_transform(next_day_scaled)[0, 0]
    
    return mse_train, mse_test, r2_train, r2_test, rmse_train, rmse_test, next_day_price



In [6]:
# Read stock names from file
with open("stocks.txt", "r") as file:
    stock_names = [line.strip() + ".csv" for line in file.readlines()]

# Process each stock
results = {}
for stock_file in stock_names[:10]:  # Process max 10 stocks
    results[stock_file] = process_stock(stock_file)

  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


In [7]:

# Read stock names from file
with open("stocks.txt", "r") as file:
    stock_names = [line.strip() for line in file.readlines()]

# Load sentiment scores
sentiment_df = pd.read_csv("sentiment.csv")

# Strip spaces from column names and ensure correct naming
sentiment_df.columns = sentiment_df.columns.str.strip()

# Check if required columns exist
if 'Ticker/Headline' not in sentiment_df.columns or 'Sentiment Score' not in sentiment_df.columns:
    raise KeyError("The columns 'Ticker/Headline' or 'Sentiment Score' are missing in sentiment.csv")

# Create a dictionary with sentiment scores
sentiment_dict = {}

for stock in stock_names:
    stock_sentiments = sentiment_df[sentiment_df['Ticker/Headline'] == stock]['Sentiment Score']
    
    # Find the first nonzero sentiment score
    nonzero_sentiments = stock_sentiments[stock_sentiments != 0]
    
    if not nonzero_sentiments.empty:
        sentiment_dict[stock] = nonzero_sentiments.iloc[0]  # First nonzero sentiment
    else:
        sentiment_dict[stock] = 0  # Default if all scores are 0


In [8]:
import pandas as pd
import numpy as np

# Read stock names from file
with open("stocks.txt", "r") as file:
    stock_names = [line.strip() for line in file.readlines()]

# Load sentiment scores
sentiment_df = pd.read_csv("sentiment.csv")
sentiment_df.columns = sentiment_df.columns.str.strip()

# Ensure correct columns exist
if 'Ticker/Headline' not in sentiment_df.columns or 'Sentiment Score' not in sentiment_df.columns:
    raise KeyError("The columns 'Ticker/Headline' or 'Sentiment Score' are missing in sentiment.csv")

# Create sentiment dictionary with first nonzero sentiment score
sentiment_dict = {}
for stock in stock_names:
    stock_sentiments = sentiment_df[sentiment_df['Ticker/Headline'] == stock]['Sentiment Score']
    
    # Find the first nonzero sentiment score
    nonzero_sentiments = stock_sentiments[stock_sentiments != 0]
    
    if not nonzero_sentiments.empty:
        sentiment_dict[stock] = nonzero_sentiments.iloc[0]  # First nonzero sentiment
    else:
        sentiment_dict[stock] = 0  # Default if all scores are 0

# Print sentiment dictionary for verification
print(" Sentiment Dictionary Loaded:")
print(sentiment_dict)

# Function to process stock data



 Sentiment Dictionary Loaded:
{'AAPL': np.float64(0.7184), 'MSFT': np.float64(-0.4215), 'GOOGL': np.float64(0.4215), '^GSPC': np.float64(-0.4215), 'NVDA': np.float64(0.128), 'TSLA': np.float64(-0.5574), 'PLTR': np.float64(-0.1531), 'LCID': np.float64(0.4588), 'AAL': np.float64(0.128), 'SOFI': np.float64(0.6369)}


In [9]:
def process_stock(stock):
    try:
        df = pd.read_csv(f"{stock}.csv")
        df.dropna(inplace=True)
        
        # Extract relevant values
        prev_close = df['Close'].iloc[-2]  # Previous closing price
        new_price = df['Close'].iloc[-1]  # New closing price
        old_price = df['Close'].iloc[-6] if len(df) >= 6 else df['Close'].iloc[0]  # Older price for trend
        
        # Predicted price (Placeholder: Replace with actual ML model prediction)
        predicted_price = new_price * (1 + np.random.uniform(-0.02, 0.02))  # Simulating a small change
        
        # Placeholder R² and RMSE (Replace with actual values from your model)
        r2 = np.random.uniform(0.7, 1.0)  # Random R² score
        rmse = np.random.uniform(0.5, 2.0)  # Random RMSE
        
        # Compute recommendation score
        rel_mse = rmse / prev_close  # Relative RMSE
        sentiment = sentiment_dict.get(stock, 0)  # Get correct sentiment score

        recommendation_score = ((predicted_price - prev_close) / prev_close) + (5 * sentiment) + (10 * (new_price / old_price - 1)) + r2 - rel_mse
        
        # Debugging prints to ensure correct sentiment score is being used
        print(f"Stock: {stock}, Sentiment Score: {sentiment}, Recommendation Score: {recommendation_score}")
        
        # Determine action
        action = "BUY" if recommendation_score > 0.5 else "SELL" if recommendation_score < -0.5 else "HOLD"
        
        return [stock, prev_close, sentiment, predicted_price, action]
    except Exception as e:
        print(f" Error processing {stock}: {e}")
        return [stock, None, None, None, "ERROR"]



In [None]:
# results = [process_stock(stock) for stock in stock_names[:10]]  # Process max 10 stocks

# Create DataFrame for output
columns = ["Stock", "Closing Price", "Sentiment Score", "Future Value", "Recommendation"]
output_df = pd.DataFrame(results, columns=columns)

# Print final table
print("\n Stock Recommendations:")
print(output_df)


 Stock Recommendations:
   Stock  Closing Price  Sentiment Score  Future Value Recommendation
0   AAPL     223.850006           0.7184    216.956089            BUY
1   MSFT     390.579987          -0.4215    380.525049           SELL
2  GOOGL     162.240005           0.4215    155.773415            BUY
3  ^GSPC    5693.310059          -0.4215   5475.105526           SELL
4   NVDA     111.430000           0.1280    107.984274            BUY
5   TSLA     273.130005          -0.5574    266.926361           SELL
6   PLTR      90.089996          -0.1531     87.517249           HOLD
7   LCID       2.420000           0.4588      2.329078            BUY
8    AAL      11.140000           0.1280     10.623420            BUY
9   SOFI      12.200000           0.6369     11.628403            BUY
