In [None]:
# Import packages
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import math
import datetime as dt
import yfinance as yf
import random
import xgboost as xgb
from sklearn.model_selection import train_test_split

# Obtaining Data

In [None]:
# Function to extract stock data we want
def get_stock_data(tickers, num_days_back, info_type):
    """Takes in a list of stock tickers, the number of days into
    the past we want data from and the type of price information, and 
    returns a Pandas dataframe with the stock data"""
    
    end_date = dt.datetime.now()
    start_date = end_date - dt.timedelta(days = num_days_back)
    
    # Initialize a dataframe to store stock data
    df = pd.DataFrame()
    
    # For each stock, download the desired information
    for ticker in tickers:
        data = yf.download(ticker, start = start_date, end = end_date)
        df[ticker] = data[info_type]
    
    return df

In [None]:
# Sustainable Stocks/Tickers we want to analyze
stocks_sustainable = ["AQN", "BEPC", "CEG", "CSIQ", "DQ", "GE", "IBDRY", "JKS", "NEE", "VWDRY"]

# Unsustainable companies tickers
stocks_unsustainable = ["EQNR", "BP", "0857.HK", "COP", "FP.VI", "SHEL", "CVX", "SR", "ENEL.MI"]

df_sustain = get_stock_data(tickers = stocks_sustainable, num_days_back = 1000, info_type = "Adj Close")
df_unsustain = get_stock_data(tickers = stocks_unsustainable, num_days_back = 1000, info_type = "Adj Close")

## Processing user input

In [None]:
# input requested Ticker to be analyzed:
ticker = input("Enter a ticker symbol of the asset you want to get processed: ")

# Download the data on the specified asset
df = yf.download(ticker, period = "5y", interval = "1d")

# Trading Strategies

## Baseline strategies

### 1. Random Trading

In [None]:
def simulate_trade(ticker_symbol, initial_balance, num_days, data = None, description = True) -> float:
    """Takes in ticker of a chosen stock, initial balance to trade with,
    and number of days to trade for, performs the random buying and selling strategy
    for for the most recent days possible, and returns the description of trades and final
    account balance"""
    
    # Download historical stock data if not given
    if data is None:
        stock_data = yf.download(ticker_symbol, 
                        start=dt.datetime.now() - dt.timedelta(days=num_days),
                        end=dt.datetime.now())
    elif isinstance(data, pd.core.frame.DataFrame):
        stock_data = data
    else:
        raise ValueError("Invalid type for 'data'. Expected DataFrame or None.")
    
    # Initialize variables for account balance and stock quantity held
    balance = initial_balance
    stock_quantity = 0

    # Define the trades for each day
    for day in range(1, len(stock_data) + 1):
        action = random.choice(['buy', 'sell'])

        # Get the stock price for the current day
        current_price = stock_data['Adj Close'][day - 1]

        if action == 'buy':
            # Randomly determine the quantity to buy
            buy_quantity = random.randint(1, 10)

            total_cost = buy_quantity * current_price

            if total_cost <= balance:
                # Perform the purchase
                stock_quantity += buy_quantity
                balance -= total_cost
                
                # Describe trade if specified
                if description == True:
                    print(f"Day {day}: Bought {buy_quantity} stocks at ${current_price:.2f} each.")

        elif action == 'sell' and stock_quantity > 0:
            # Randomly determine the quantity to sell
            sell_quantity = random.randint(1, stock_quantity)

            total_earning = sell_quantity * current_price

            # Perform the sale
            stock_quantity -= sell_quantity
            balance += total_earning
            
            # Describe the trade if specified
            if description == True:
                print(f"Day {day}: Sold {sell_quantity} stocks at ${current_price:.2f} each.")

    # Sell remaining stocks on the last day
    if stock_quantity > 0:
        total_earning = stock_quantity * current_price
        balance += total_earning
        
        if description == True:
            print(f"Final day: Sold remaining {stock_quantity} stocks at ${current_price:.2f} each.")

    if description == True:
        print(f"Final balance: ${balance:.2f}")
    
    else:
        return balance

In [None]:
# Simulate trading cycles (30-days)
trade_simulations = [simulate_trade(ticker, 100, 30, data = df, description = False) for i in range(1000)]

In [None]:
# Plot the trade simulations distribution
plt.figure(figsize=(10,4))
sns.displot(trade_simulations)
plt.axvline(np.mean(trade_simulations), color='b', linestyle='-')
plt.axvline(100, color='g', linestyle='--')
plt.title('Avg: $%s\nSD: $%s'%(round(np.mean(trade_simulations),2), round(np.std(trade_simulations),2)), fontsize=20)

### 2. Buy and Hold

In [None]:
def buy_and_hold(ticker_symbol: str, initial_balance: float, num_days: int, data = None, description = True):
    """Takes in the ticker symbol, the initial investment, and the
    time window in days, intended to hold the investment, then returns
    the final investment balance after selling the position at the end of the period"""
    
    # Download historical stock data if not given
    if data is None:
        stock_data = yf.download(ticker_symbol, 
                        start=dt.datetime.now() - dt.timedelta(days=num_days),
                        end=dt.datetime.now())
    elif isinstance(data, pd.core.frame.DataFrame):
        stock_data = data
    else:
        raise ValueError("Invalid type for 'data'. Expected DataFrame or None.")
    
    # Initialize variables for account balance and stock quantity held
    stock_quantity = initial_balance / stock_data["Adj Close"][0]
    
    # Final balance
    final_balance = stock_quantity * stock_data["Adj Close"][-1]
    
    if description == True:
        print(f"Final balance: ${final_balance:.2f}")
    
    else:
        return final_balance


In [None]:
buy_and_hold(ticker, 10000, 30)

## Data based strategies

### Moving Average 

In [None]:
# Simple moving average function
def SMA(data: pd.DataFrame, price_info: str, period: int = 30):
    """Takes in a dataframe with historical prices of an asset,
    type of price information used, such as Close or
    Adjusted close, and the rolling window period, and calculates
    the average"""
    
    return data[price_info].rolling(window = period).mean()

In [None]:
# Function to produce trading signals based on an MA strategy
def MA_strategy(data: pd.DataFrame, long_ma: int, short_ma: int) -> pd.DataFrame:
    """Takes in a time series dataframe with the asset information,
    produces buy and sell signals based on a crossover strategy
    with specified period from which to calculate the 
    short term and long term moving averages."""
    
    # Add the moving average columns to the dataset
    data["LongMA"] = SMA(data, "Open", period = long_ma)
    data["ShortMA"] = SMA(data, "Open", period = short_ma)
    
    # Define buy and sell signals
    data["Signal"] = np.where(data["ShortMA"] > data["LongMA"], 1, 0)
    data["Position"] = data["Signal"].diff()
    data["Buy"] = np.where(data["Position"] == 1, data["Open"], np.NaN)
    data["Sell"] = np.where(data["Position"] == -1, data["Open"], np.NaN)
    
    return data

In [None]:
df_ma = MA_strategy(data = df, long_ma = 50, short_ma = 20)

In [None]:
# Visualization of Buy and sell signals using Moving Average
plt.figure(figsize = (16,8))
plt.title("Moving Average trade signals", fontsize = 18)
plt.plot(df_ma["Open"], alpha = 0.5)
plt.plot(df_ma["ShortMA"], alpha = 0.5)
plt.plot(df_ma["LongMA"], alpha = 0.5)
plt.scatter(df_ma.index, df_ma["Buy"], alpha = 1, label = "Buy Signal", marker = "^", color = "green")
plt.scatter(df_ma.index, df_ma["Sell"], alpha = 1, label = "Sell Signal", marker = "v", color = "red")
plt.xlabel("Date", fontsize = 18)
plt.ylabel("Open price", fontsize = 18)
plt.show

### XGBoost Regression 

In [None]:
# Trading strategy
def xgb_signals(data: pd.DataFrame, test_size: float = 0.2) -> pd.DataFrame:
    """Takes in a Pandas dataframe containing information on an asset
    from Yahoo Finance and the proportion of the dataset to be used for testing performance,
    and returns a dataframe with buy and sell signals for a specified period"""
    
    # Creating a features dataset and target variable dataset
    features = data[["Open", "Volume"]]
    target = data["Close"]
    
    # Train test split
    feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=test_size, shuffle=False)
      
    # train the model
    model = xgb.XGBRegressor()
    model.fit(feature_train, target_train)
               
    # Predicted values of the model
    predictions = model.predict(feature_test)
               
    # Create a dataframe with lagged version of close prices
    lagged_actual_prices = target_test.shift(1)
    
    # Define signal conditions
    conditions = [
        predictions > lagged_actual_prices,
        predictions < lagged_actual_prices
    ]
    values = [1, -1]
    
    # Set up the trade signals DataFrame
    trade_signals_df = pd.DataFrame(index=lagged_actual_prices.index)
    
    # Include the trade signals in the dataframe
    trade_signals_df['Trade_Signal'] = np.select(conditions, values, default=0)
    
    # Filter only rows where Trade signal is not neutral
    trade_signals_df = trade_signals_df[trade_signals_df['Trade_Signal'] != 0]
    
    # Add the close prices for the days with trade signals
    trade_signals_df['Close_Price_On_Trade'] = target_test.loc[trade_signals_df.index]
    
    return trade_signals_df

In [None]:
df_trading = xgb_signals(df)
df_trading

In [None]:
 # Plot buy and sell signals
buy_signals = df_trading[df_trading['Trade_Signal'] == 1]
sell_signals = df_trading[df_trading['Trade_Signal'] == -1]
    
plt.scatter(buy_signals.index, buy_signals['Close_Price_On_Trade'], marker='^', color='g', label='Buy Signal')
plt.scatter(sell_signals.index, sell_signals['Close_Price_On_Trade'], marker='v', color='r', label='Sell Signal')
    
# Customize the plot
plt.title('Buy and Sell Signals with Price Line')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()

### GARCH

In [None]:
from arch import arch_model
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
# Take just EQNR as an experiment
df1 = df_unsustain["EQNR"]

In [None]:
returns = 100 * df1.pct_change().dropna()
print(returns)

In [None]:
plt.figure(figsize = (10, 4))
plt.plot(returns)
plt.ylabel("Pct Return", fontsize = 16)
plt.title("EQNR Returns", fontsize = 20)

### PACF

In [None]:
plot_pacf(returns**2)
plt.show() # Not very autocorrelated returns variance

In [None]:
model_1 = arch_model(returns, p = 2, q = 2)

In [None]:
model_fit = model_1.fit()

In [None]:
# Insignificant -> not a useful choice
print(model_fit.summary())

## 1. AQN

In [None]:
from statsmodels.tsa.arima.model import ARIMA

In [None]:
# Choose a stock from the dataset
df2 = df_sustain["AQN"]
df2.head(3)

In [None]:
# Plot the prices -> non-stationary
plt.figure(figsize = (10, 4))
plt.plot(df2)
plt.ylabel("Price", fontsize = 16)
plt.title("AQN price", fontsize = 20)

In [None]:
# First-Differences (returns)
df2_returns = 100*df2.pct_change().dropna()

# seems more stationary now
plt.figure(figsize = (10, 4))
plt.plot(df2_diff)
plt.ylabel("Returns", fontsize = 16)
plt.title("AQN Returns", fontsize = 20)

In [None]:
# ACF to examine the MA part -> MA(1) seems likely
plot_acf(df2_diff)
plt.show

In [None]:
# PACF to examine the AR part -> AR(1) seems likely
plot_pacf(df2_diff, method = "ywm")
plt.show

### Try ARMA(1,1)

In [None]:
# Split data into training and testing datasets
train_end = dt.datetime(2023,9,1)
test_end = dt.datetime(2023,12,31)

train_data = df2_diff[:train_end]
test_data = df2_diff[train_end + dt.timedelta(days = 1):test_end]

In [None]:
# Model define
model_1 = ARIMA(train_data, order = (1,0,1))

In [None]:
# fit the model
model_1_fit = model_1.fit()

In [None]:
# summary table -> only sigma2 significant, Does not make sense to use MA nor AR
print(model_1_fit.summary())

## 2. Shell

In [None]:
# Load the dataset
df3 = df_unsustain["SHEL"]

# Plot the price
plt.figure(figsize = (10, 4))
plt.plot(df3)
plt.ylabel("Price", fontsize = 16)
plt.title("Shell Price", fontsize = 20)

In [None]:
# calculate daily returns
returns_shell = 100 * df3.pct_change().dropna()

# Plot the returns
plt.figure(figsize = (10, 4))
plt.plot(returns_shell)
plt.ylabel("Returns", fontsize = 16)
plt.title("Shell Returns", fontsize = 20)

In [None]:
# ACF
plot_acf(returns_shell)
plt.show

In [None]:
# PACF -> not a good candidate for ARIMA
plot_pacf(returns_shell, method = "ywm")
plt.show

### Try GARCH

In [None]:
# ACF
plot_acf(returns_shell**2)
plt.show

In [None]:
# PACF
plot_pacf(returns_shell**2, method = "ywm")
plt.show

### GARCH(3,3)

In [None]:
# Training and testing sets
# Split data into training and testing datasets
train_end_shell = dt.datetime(2023,9,1)
test_end_shell = train_end_shell + dt.timedelta(days=90)

train_data_shell = returns_shell[:train_end_shell]
test_data_shell = returns_shell[train_end_shell + dt.timedelta(days = 1):test_end_shell]

In [None]:
# Define model
model_garch_shell = arch_model(train_data_shell, p=3, q=3)

In [None]:
# Fit the model
fit_garch_shell = model_garch_shell.fit()

In [None]:
print(fit_garch_shell.summary())

### Try GARCH(0,3)

In [None]:
# Fit the model
model_garch_shell_2 = arch_model(train_data_shell, p=1, q=3)

garch_shell_2_fit = model_garch_shell_2.fit()

In [None]:
print(garch_shell_2_fit.summary()) # seems like a better fit