In [2]:
import pandas as pd
import numpy as np 
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score, RocCurveDisplay
from statistics import mean, median
import matplotlib

In [3]:
def get_st_momentum(num_days, closing_prices):
    # Average of a stock's momentum over the past num_days. Each day is labeled 1 if
    # the closing price that day is higher than the closing price of the day before, and -1 if its lower
    momentum = []
    stock_momentum = []

    for i in range(num_days, len(closing_prices)):
        momentum.append(1 if closing_prices[i] > closing_prices[i - 1] else -1)

    for i in range(num_days, len(closing_prices)):
        stock_momentum.append(mean(momentum[i - num_days:i]))

    return stock_momentum

In [4]:
def get_volatility(num_days, closing_prices):
    # Stock price volatility. This is an average over the past num_days of
    # percent change in a stock's price per day
    volatility = []
    avg_volatility = []

    for i in range(num_days, len(closing_prices)):
        volatility.append((closing_prices[i] - closing_prices[i-1])/closing_prices[i-1])

    for i in range(num_days, len(closing_prices)):
        avg_volatility.append(mean(volatility[i - num_days:i]))

    return avg_volatility

In [5]:
df  = pd.read_csv("C:\\Users\\Sepehr\\Desktop\\SN_Project_final\\3_Data_for_ML_Plot_withImportance\\topico_financial.csv")
num_days = 1 # prediction threshold
n = 3 # number of days before 
    
closing_prices = list(df['close'])
    
#feature vectors
df = df[n:]
df['Stock_Momentum'] = get_st_momentum(n, closing_prices)
df['Volatility'] = get_volatility(n, closing_prices)
df = df[['date','Volatility', 'Stock_Momentum']]

    
df = df[:len(df)-num_days]
    
#convert input features into array
    
X = np.array(df)
    
# Create Y vector; defined as whether a stock will increase or decrease in price in num_days
Y = []
for i in range(len(closing_prices)-num_days):
    if (closing_prices[i+num_days]- closing_prices[i])/closing_prices[i] > 0.005 :
        Y.append(1)
    if (closing_prices[i+num_days]- closing_prices[i])/closing_prices[i] < -0.005 :
        Y.append(-1)
    if (closing_prices[i+num_days]- closing_prices[i])/closing_prices[i] >= -0.005 and (closing_prices[i+num_days]- closing_prices[i])/closing_prices[i] <= 0.005 :
        Y.append(0)


# Adjust length of Y to match X if needed
if len(Y) > len(X):
    adjustment = len(Y) - len(X)
    Y = Y[adjustment:]    


    


In [6]:
# Split training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, shuffle= False, random_state = 0)

In [7]:
X_test = pd.DataFrame(X_test)
X_train = pd.DataFrame(X_train)
dates = X_test[0]
dates

0      20210522.0
1      20210523.0
2      20210524.0
3      20210525.0
4      20210526.0
          ...    
168    20220130.0
169    20220131.0
170    20220201.0
171    20220202.0
172    20220205.0
Name: 0, Length: 173, dtype: float64

In [8]:
X_test = X_test.iloc[:, 1:3]
X_train = X_train.iloc[:,1:3]


In [9]:
X_test = np.array(X_test)


In [10]:
X_train = np.array(X_train)

In [11]:
X_test = preprocessing.scale(X_test)
X_train = preprocessing.scale(X_train)

In [12]:
# Construct and build classifier
clf = svm.SVC(kernel='rbf', gamma='scale')
clf.fit(X_train, Y_train)

SVC()

In [13]:
# Calculate accuracy
score = clf.score(X_test, Y_test)


In [14]:
predicted = pd.DataFrame([clf.predict(X_test),Y_test])
predicted = predicted.transpose()
predicted = predicted.rename(columns={0:'Predicted',1:'True'})

In [15]:
predicted['Dates'] = dates
predicted = predicted [['Dates','Predicted','True']]

In [16]:
predicted.to_csv('financial_indicator.csv', index = False)
print("score = %f" %score)

score = 0.676301
