In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler

# Model

In [2]:
class AdaptiveSVM:

    def __init__(self, kernel='rbf', C=1.0, epsilon=0.1):
        self.model = SVR(kernel=kernel, C=C, epsilon=epsilon)
        self.scaler = MinMaxScaler()
        self.a=1
        self.b=1

    def prepare_input(self,data):
        def relative_difference_percentage(prices, period=5):
          rdp = ((prices.shift(period-5) - prices.shift(period)) / prices.shift(period)) * 100
          return rdp
        rdp5 = relative_difference_percentage(data, period=5).fillna(method='bfill')
        rdp10 = relative_difference_percentage(data, period=10).fillna(method='bfill')
        rdp15 = relative_difference_percentage(data, period=15).fillna(method='bfill')
        rdp20 = relative_difference_percentage(data, period=20).fillna(method='bfill')
        ema100 = data.ewm(span=100, adjust=False).mean().fillna(method='bfill')
        df=pd.DataFrame(rdp5,columns=["rdp5"])
        df["rdp5"]=rdp5
        df["rdp10"]=rdp10
        df["rdp15"]=rdp15
        df["rdp20"]=rdp20
        df["ema100"]=ema100
        df["transfomed_cp"]=data-df["ema100"]
        ## goes into input of SVM
        ema3= data.ewm(span=3,adjust=False).mean()
        rdp_plus_5= relative_difference_percentage(ema3,period=5).fillna(method='bfill')
        df["rdp+5"]=rdp_plus_5
        df["ema3"]=ema3
        # Instead of dropping rows with NaN, fill them with 0
        df.fillna(0, inplace=True)
        return df

    def adaptive_weights(self, n_samples): ##n_samples = length of training dataset
        i = np.arange(1, n_samples + 1)
        C_i = self.model.C * (2 / (1 + np.exp(self.a - 2 * self.a * i / n_samples)))
        epsilon_i = self.model.epsilon * ((1 + np.exp(self.b - 2 * self.b * i / n_samples)) / 2)
        return C_i, epsilon_i

    def train(self, X):
        input = self.prepare_input(X).iloc[:, [0, 1, 2, 3, 5]]
        output = self.prepare_input(X).iloc[:, 6]
        C_i, epsilon_i = self.adaptive_weights(len(X))
        for i in range(len(X)):
            self.model.C = C_i[i]
            self.model.epsilon = epsilon_i[i]
            # Convert the DataFrame slice to a NumPy array and then reshape
            input_reshaped = input.iloc[i:i + 1].values.reshape(1, -1)
            self.model.fit(input_reshaped, output.iloc[i:i + 1])
        self.final_model= SVR(kernel='rbf', C=self.model.C, epsilon=self.model.epsilon)
        self.final_model.fit(input,output)
        return self.final_model

    def predict(self,X):
        input = self.prepare_input(X).iloc[:, [0, 1, 2, 3, 5]]
        return self.final_model.predict(input)

    def get_params(self, deep=True):
        return {
            'kernel': self.model.kernel,
            'C': self.model.C,
            'epsilon': self.model.epsilon
        }
    def check_accuracy(self, Y_pred, y):
        accuracy = np.mean(np.abs(Y_pred-y)) ## MAE
        return accuracy

# Trial on Apple stocks

In [3]:
ticker = "AAPL"
aapl = yf.download(ticker, start="2020-01-01", end="2025-01-01")['Close']
# Create an instance of the AdaptiveSVM class
asvm_model = AdaptiveSVM()
# Call the prepare_input method on the instance with the input data
input_data = asvm_model.prepare_input(aapl["AAPL"])  # Pass aapl["AAPL"] as the data argument
print(input_data) ## just to check

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

                rdp5     rdp10     rdp15     rdp20      ema100  transfomed_cp  \
Date                                                                            
2020-01-02  3.089726  3.089726  3.089726  3.089726   72.716064       0.000000   
2020-01-03  3.089726  3.089726  3.089726  3.089726   72.702066      -0.692933   
2020-01-06  3.089726  3.089726  3.089726  3.089726   72.699706      -0.116798   
2020-01-07  3.089726  3.089726  3.089726  3.089726   72.690634      -0.449095   
2020-01-08  3.089726  3.089726  3.089726  3.089726   72.704753       0.698896   
...              ...       ...       ...       ...         ...            ...   
2024-12-24  1.862091  2.304553  2.110036  4.199767  229.742268      28.174175   
2024-12-26  4.422487  0.632886  1.432041  3.382115  230.316391      28.419113   
2024-12-27  2.321955  0.738015  2.024364  3.452091  230.811300      24.497995   
2024-12-30 -0.899841  2.563178  2.178391  2.321657  231.229354      20.693665   
2024-12-31 -1.899952  1.6849


  rdp5 = relative_difference_percentage(data, period=5).fillna(method='bfill')
  rdp10 = relative_difference_percentage(data, period=10).fillna(method='bfill')
  rdp15 = relative_difference_percentage(data, period=15).fillna(method='bfill')
  rdp20 = relative_difference_percentage(data, period=20).fillna(method='bfill')
  ema100 = data.ewm(span=100, adjust=False).mean().fillna(method='bfill')
  rdp_plus_5= relative_difference_percentage(ema3,period=5).fillna(method='bfill')


In [4]:
asvm_model.train(aapl["AAPL"][:-15])  # train on all except last 15 dataset

  rdp5 = relative_difference_percentage(data, period=5).fillna(method='bfill')
  rdp10 = relative_difference_percentage(data, period=10).fillna(method='bfill')
  rdp15 = relative_difference_percentage(data, period=15).fillna(method='bfill')
  rdp20 = relative_difference_percentage(data, period=20).fillna(method='bfill')
  ema100 = data.ewm(span=100, adjust=False).mean().fillna(method='bfill')
  rdp_plus_5= relative_difference_percentage(ema3,period=5).fillna(method='bfill')
  rdp5 = relative_difference_percentage(data, period=5).fillna(method='bfill')
  rdp10 = relative_difference_percentage(data, period=10).fillna(method='bfill')
  rdp15 = relative_difference_percentage(data, period=15).fillna(method='bfill')
  rdp20 = relative_difference_percentage(data, period=20).fillna(method='bfill')
  ema100 = data.ewm(span=100, adjust=False).mean().fillna(method='bfill')
  rdp_plus_5= relative_difference_percentage(ema3,period=5).fillna(method='bfill')


In [5]:
predictions = asvm_model.predict(aapl["AAPL"][-15:])

  rdp5 = relative_difference_percentage(data, period=5).fillna(method='bfill')
  rdp10 = relative_difference_percentage(data, period=10).fillna(method='bfill')
  rdp15 = relative_difference_percentage(data, period=15).fillna(method='bfill')
  rdp20 = relative_difference_percentage(data, period=20).fillna(method='bfill')
  ema100 = data.ewm(span=100, adjust=False).mean().fillna(method='bfill')
  rdp_plus_5= relative_difference_percentage(ema3,period=5).fillna(method='bfill')


In [6]:
print(predictions)

[ 2.52246642  2.52127454  2.52296574  2.52341135  2.53739868  2.54584278
  1.00505118  1.10623359  2.77203626  1.99101723  2.11552255  3.98729385
  2.23103807 -0.33739404 -1.47187674]


In [7]:
print(asvm_model.check_accuracy(predictions,input_data["rdp+5"][-15:]))

0.5368491430670429


RDP+5 represents the predicted percentage change in the smoothed (EMA3) closing price over the next five days.

A positive RDP+5 suggests the price is expected to increase.

A negative RDP+5 suggests the price is expected to decrease.

2. Basic Trading Strategy:

The simplest strategy is:

Buy Signal: If RDP+5 >= 0, then buy the stock.

Sell Signal: If RDP+5 < 0, then sell the stock.

In [10]:
df=aapl.iloc[-len(predictions):,:]
df["action"] = 0  # Initialize the 'action' column with 0s
for i in range(len(predictions)):
  if predictions[i] > 0:
    # print("Buy")
    df.iloc[i, df.columns.get_loc("action")] = 1  # Use get_loc to find the column index of 'action'
  else:
    # print("Sell")
    df.iloc[i, df.columns.get_loc("action")] = -1  # Use get_loc to find the column index of 'action'

df["returns_b&h"]=np.log(df["AAPL"]/df["AAPL"].shift(1)) ## the return thta would have come if i had simply bought and held the stock
df["my_returns"]=np.log(df["AAPL"]/df["AAPL"].shift(1))*df["action"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["action"] = 0  # Initialize the 'action' column with 0s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["returns_b&h"]=np.log(df["AAPL"]/df["AAPL"].shift(1)) ## the return thta would have come if i had simply bought and held the stock
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["my_retur

In [11]:
df

Ticker,AAPL,action,returns_b&h,my_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-12-10,247.497879,1,,
2024-12-11,246.219284,1,-0.005179,-0.005179
2024-12-12,247.687683,1,0.005946,0.005946
2024-12-13,247.857483,1,0.000685,0.000685
2024-12-16,250.764282,1,0.011659,0.011659
2024-12-17,253.201599,1,0.009673,0.009673
2024-12-18,247.777573,1,-0.021655,-0.021655
2024-12-19,249.515656,1,0.00699,0.00699
2024-12-20,254.21051,1,0.018641,0.018641
2024-12-23,254.989655,1,0.00306,0.00306


In [12]:
np.exp(df["my_returns"].sum())

1.0528584634002371

In [13]:
np.exp(df["returns_b&h"].sum())

1.010695423883853

my_returns is greater

# Trial on amazon stocks

In [24]:
tickers= ["AAPL","AMZN","NFLX","GOOGL","MSFT","SPY","NVDA"]
all_returns = {}

for ticker in tickers: # Change loop variable to 'ticker'
  data=yf.download(ticker,start="2020-01-01",end="2025-02-28")["Close"]
  input_data = asvm_model.prepare_input(data[ticker])  # Use 'ticker' here
  asvm_model.train(data[ticker][:-15])  # Use 'ticker' here
  predictions = asvm_model.predict(data[ticker][-15:]) # Use 'ticker' here
  # print(asvm_model.check_accuracy(predictions,input_data["rdp+5"][-15:]))
  df=data.iloc[-len(predictions):,:]
  df["action"] = 0  # Initialize the 'action' column with 0s
  for j in range(len(predictions)): # Change inner loop variable to 'j'
    if predictions[j] > 0:
      # print("Buy")
      df.iloc[j, df.columns.get_loc("action")] = 1  # Use get_loc to find the column index of 'action'
    else:
      # print("Sell")
      df.iloc[j, df.columns.get_loc("action")] = -1  # Use get_loc to find the column index of 'action'

  df["returns_b&h"]=np.log(df[ticker]/df[ticker].shift(1)) ## Use 'ticker' to access the stock column
  df["my_returns"]=np.log(df[ticker]/df[ticker].shift(1))*df["action"] ## Use 'ticker' to access the stock column
  my_return= np.exp(df['my_returns'].sum())
  basic_return = np.exp(df['returns_b&h'].sum())

  all_returns[ticker] = my_return-basic_return  # Store excess returns for this ticker


[*********************100%***********************]  1 of 1 completed
  rdp5 = relative_difference_percentage(data, period=5).fillna(method='bfill')
  rdp10 = relative_difference_percentage(data, period=10).fillna(method='bfill')
  rdp15 = relative_difference_percentage(data, period=15).fillna(method='bfill')
  rdp20 = relative_difference_percentage(data, period=20).fillna(method='bfill')
  ema100 = data.ewm(span=100, adjust=False).mean().fillna(method='bfill')
  rdp_plus_5= relative_difference_percentage(ema3,period=5).fillna(method='bfill')
  rdp5 = relative_difference_percentage(data, period=5).fillna(method='bfill')
  rdp10 = relative_difference_percentage(data, period=10).fillna(method='bfill')
  rdp15 = relative_difference_percentage(data, period=15).fillna(method='bfill')
  rdp20 = relative_difference_percentage(data, period=20).fillna(method='bfill')
  ema100 = data.ewm(span=100, adjust=False).mean().fillna(method='bfill')
  rdp_plus_5= relative_difference_percentage(ema3,perio

TypeError: cannot concatenate object of type '<class 'numpy.float64'>'; only Series and DataFrame objs are valid

In [25]:
all_returns ## this denotes the excess returns

{'AAPL': 0.08533408671661746,
 'AMZN': 0.2701397776939035,
 'NFLX': 0.1249050118851015,
 'GOOGL': 0.25765572668960235,
 'MSFT': 0.14947430151578012,
 'SPY': 0.0853620343627528,
 'NVDA': 0.2351350668216584}

Everywhere we get better output than in  basic returns