<a href="https://colab.research.google.com/github/mjgpinheiro/Econophysics/blob/main/Ergontropic_ST_optimizewindow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.arima.model import ARIMA

# Load data
df_price = yf.download('AAPL', start='2016-01-01', end='2021-09-01')
df_price.dropna(inplace=True)

# Load news
df_news = yf.download('AAPL', start='2016-01-01', end='2021-09-01')
df_news = df_news[['Open', 'High', 'Low', 'Close']]
df_news.columns = ['Open', 'High', 'Low', 'Headlines']
df_news = df_news[df_news['Headlines'].notna()]  # remove rows with missing values
df_news = df_news[df_news['Headlines'].apply(lambda x: isinstance(x, str))]  # remove non-string values

df_news = pd.read_html('https://finance.yahoo.com/quote/AAPL/news/')[0]
df_news.columns = df_news.columns.str.lower()
df_news = df_news[['date', 'headline']]
df_news.columns = ['Date', 'Headlines']
df_news['Date'] = pd.to_datetime(df_news['Date']).dt.date
df_news.set_index('Date', inplace=True)

# Add sentiment analysis columns to news data
df_news.loc[:, 'Sentiment'] = 0
df_news.loc[:, 'Energy'] = np.log(len(df_news.Headlines.unique()))
df_news.loc[:, 'Entropy'] = -np.sum(
    [(len(df_news[df_news.Headlines.str.contains(word)]))/len(df_news) * np.log((len(df_news[df_news.Headlines.str.contains(word)]))/len(df_news)) for word in df_news.Headlines.str.split()])

# Merge news data with stock price data
merged_data = pd.merge(df_price, df_news, on='Date', how='outer').fillna(method='ffill').fillna(method='bfill')


# Define window size range for ARIMA model
window_size_range = np.arange(2, 30)

# Initialize variables to store AIC and BIC values for each window size
aic_values = np.zeros(len(window_size_range))
bic_values = np.zeros(len(window_size_range))

# Loop through window sizes to fit and evaluate ARIMA models
for i, window_size in enumerate(window_size_range):
    # Prepare features and target
    features = merged_data[['Energy', 'Entropy', 'Sentiment', 'Lagged_Return']].rolling(window_size).mean().dropna()
    target = merged_data['Returns'].loc[features.index]
    
    # Fit ARIMA model
    model = ARIMA(target, order=(1, 0, 0))
    results = model.fit()
    
    # Calculate AIC and BIC values
    aic_values[i] = results.aic
    bic_values[i] = results.bic

# Choose optimal window size
optimal_window_size = window_size_range[np.argmin(aic_values)]
print(f'Optimal window size: {optimal_window_size}')

# Prepare features and target using optimal window size
features = merged_data[['Energy', 'Entropy', 'Sentiment', 'Lagged_Return']].rolling(optimal_window_size).mean().dropna()
target = merged_data['Returns'].loc[features.index]

# Scale features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Train neural network
nn = MLPRegressor(hidden_layer_sizes=(10, 10), activation='relu', solver='adam', max_iter=5000, random_state=0)
nn.fit(scaled_features, target)

# Get last day's features
last_day_features = merged_data[['Energy', 'Entropy', 'Sentiment', 'Lagged_Return']].tail(optimal_window_size).mean().values.reshape(1, -1)
scaled_last_day_features = scaler.transform(last_day_features)

# Make prediction using neural network
predicted_return = nn.predict(scaled_last_day_features)

# Calculate stop loss
stop_loss = df_price['Adj Close'][-1] * 0.95

# Determine trading decision
if predicted_return > 0 and df_price['Adj Close'][-1] < stop_loss: 
  trade = 'BUY'
elif predicted_return < 0:
  trade = 'SELL'
else:
  trade = 'HOLD'

# Print trading decision
shares = investment_size / df_price['Adj Close'][-1]
current_return = shares * df_price['Adj Close'][-1] - investment_size
if trade == 'BUY':
    action = 'Buy shares'
elif trade == 'SELL':
    action = 'Sell shares'
else:
    action = 'Hold shares'
print(f'Trade: {trade}, Shares: {shares:.2f}, Investment Size: {investment_size:.2f}, Predicted Return: {predicted_return:.2f}, Current Return: {current_return:.2f}, Action: {action}')
