In [117]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from ta.momentum import RSIIndicator
from ta.trend import MACD

In [118]:
data_folder = os.path.join(os.getcwd(), "stocks")

In [119]:
all_data = []

for filename in os.listdir(data_folder):
    if filename.endswith(".csv"):
        stock_symbol = filename.split(".")[0]  # Extract stock symbol from filename
        file_path = os.path.join(data_folder, filename)
        
        # Read the CSV file
        stock_data = pd.read_csv(file_path)
        
        # Check if DataFrame is empty
        if not stock_data.empty:
            stock_data['Symbol'] = stock_symbol  # Add stock symbol column
            all_data.append(stock_data)

# Concatenate all non-empty DataFrames
if all_data:  # Only concatenate if there's data
    data = pd.concat(all_data, ignore_index=True)
else:
    print("No valid data found in the stocks folder.")

In [120]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Symbol
0,2014-10-17,22.98,23.33,22.92,23.25,16.999857,28485000,CSCO
1,2014-10-20,22.9,23.08,22.690001,22.93,16.765884,34386900,CSCO
2,2014-10-21,23.1,23.6,22.959999,23.51,17.189964,33712700,CSCO
3,2014-10-22,23.67,23.68,23.24,23.26,17.007174,25172900,CSCO
4,2014-10-23,23.610001,23.76,23.42,23.57,17.233839,21669900,CSCO


In [121]:
data['Date'] = pd.to_datetime(data['Date'])

In [122]:
data['pct_change'] = data.groupby('Symbol')['Close'].pct_change() * 100

In [131]:
selected_ticker = "AMZN"

stock_data = data[data['Symbol'] == selected_ticker].copy()
stock_data = stock_data.sort_values('Date')

stock_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Symbol,pct_change
72786,2014-10-17,15.3575,15.46,15.136,15.182,15.182,88102000,AMZN,
72787,2014-10-20,15.1475,15.3265,15.101,15.3105,15.3105,64690000,AMZN,0.846397
72788,2014-10-21,15.495,15.782,15.3535,15.7665,15.7665,71186000,AMZN,2.97835
72789,2014-10-22,15.7715,15.949,15.632,15.6485,15.6485,62518000,AMZN,-0.748422
72790,2014-10-23,15.67,15.84,15.57,15.659,15.659,180764000,AMZN,0.067099


In [132]:
stock_data['SMA_5_pct'] = stock_data['pct_change'].rolling(window=5).mean()
stock_data['EMA_5_pct'] = stock_data['pct_change'].ewm(span=5, adjust=False).mean()

rsi = RSIIndicator(close=stock_data['pct_change'], window=5)
stock_data['RSI_5_pct'] = rsi.rsi()

macd = MACD(close=stock_data['pct_change'], window_slow=5, window_fast=3, window_sign=5)
stock_data['MACD'] = macd.macd()
stock_data['MACD_Signal'] = macd.macd_signal()

stock_data['pct_change_shifted'] = stock_data['pct_change'].shift(-1)
stock_data = stock_data.dropna(subset=['SMA_5_pct', 'EMA_5_pct', 'RSI_5_pct', 'MACD', 'MACD_Signal', 'pct_change_shifted'])

In [133]:
train_size = int(0.8 * len(stock_data))
train_data = stock_data[:train_size]
test_data = stock_data[train_size:]

features = ['SMA_5_pct', 'EMA_5_pct', 'RSI_5_pct', 'MACD', 'MACD_Signal']
target = 'pct_change_shifted'

X_train = train_data[features]
y_train = train_data[target]

X_test = test_data[features]
y_test = test_data[target]

In [134]:
model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [135]:
correlation = np.corrcoef(y_test, y_pred)[0, 1]
print(f"Correlation between actual and predicted percentage returns for {selected_ticker}: {correlation:.4f}")

Correlation between actual and predicted percentage returns for AMZN: -0.0227


In [136]:
indicator_correlations = {}

for feature in features:
    correlation = np.corrcoef(X_test[feature], y_pred)[0, 1]
    indicator_correlations[feature] = correlation

for feature, corr in indicator_correlations.items():
    print(f"Correlation between predicted returns and {feature}: {corr:.4f}")

Correlation between predicted returns and SMA_5_pct: -0.3959
Correlation between predicted returns and EMA_5_pct: -0.4769
Correlation between predicted returns and RSI_5_pct: 0.1482
Correlation between predicted returns and MACD: -0.2590
Correlation between predicted returns and MACD_Signal: -0.3005
