In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score



In [4]:
def generate_time_series_data(ticker, n, ticker_interval, min_price, max_price, min_volume, max_volume, num_moving_averages, moving_average_periods):
    # Calculate the number of trading days in the interval
    trading_days = pd.date_range(end=pd.Timestamp.today(), periods=n, freq=ticker_interval).strftime('%Y-%m-%d')
    
    # Initialize empty dataframe for the time series data
    time_series_data = pd.DataFrame(index=range(len(trading_days)), columns=['date', 'ticker', 'open', 'high', 'low', 'close', 'volume'])
    
    # Generate random price data that increases and decreases
    price_data = np.cumsum(np.random.normal(size=n)) + np.random.randint(low=min_price, high=max_price)
    price_data = np.maximum(price_data, np.zeros_like(price_data))  # prevent negative prices
    
    # Generate OHLCV data based on the price data
    ohlcv_data = []
    for i, price in enumerate(price_data):
        open_price = price - np.random.randint(low=1, high=10)
        close_price = price + np.random.randint(low=1, high=10)
        high_price = max(price, open_price, close_price) + np.random.randint(low=1, high=10)
        low_price = min(price, open_price, close_price) - np.random.randint(low=1, high=10)
        volume = np.random.randint(low=min_volume, high=max_volume)
        ohlcv_data.append((open_price, high_price, low_price, close_price, volume))
    ohlcv_data = np.array(ohlcv_data)
    
    # Generate moving average data that aligns with the price data
    moving_average_data = np.zeros((n, num_moving_averages))
    for i in range(num_moving_averages):
        period = moving_average_periods[i]
        moving_average_data[:,i] = np.convolve(price_data, np.ones((period,))/period, mode='same')
        
    # Set buy_signal to True if smallest moving average is greater than any of the other moving averages
    buy_signal = moving_average_data[:,0] > np.max(moving_average_data[:,1:], axis=1)
    
    # Store the generated data in the time series data dataframe
    time_series_data['date'] = trading_days
    time_series_data['ticker'] = ticker
    time_series_data[['open', 'high', 'low', 'close', 'volume']] = ohlcv_data
    for i in range(num_moving_averages):
        time_series_data[f'ma_{moving_average_periods[i]}'] = moving_average_data[:,i]
    time_series_data['buy_signal'] = buy_signal
    
    return time_series_data


In [5]:
data = generate_time_series_data('AAPL', 1000, '30min', 100, 200, 100000, 5000000, 3, [10, 50, 200])
data[data["buy_signal"] == False]

Unnamed: 0,date,ticker,open,high,low,close,volume,ma_10,ma_50,ma_200,buy_signal
0,2023-04-13,AAPL,186.585677,195.585677,183.585677,193.585677,102339.0,95.819271,98.128565,97.407652,False
44,2023-04-14,AAPL,186.503462,206.503462,181.503462,201.503462,3408521.0,195.210784,195.636130,136.781796,False
45,2023-04-14,AAPL,184.829568,204.829568,180.829568,202.829568,2012290.0,194.607300,195.544876,137.638955,False
46,2023-04-14,AAPL,185.033027,206.033027,183.033027,199.033027,417774.0,194.089733,195.523270,138.499652,False
47,2023-04-14,AAPL,182.641300,202.641300,177.641300,196.641300,374673.0,193.803386,195.455673,139.363360,False
...,...,...,...,...,...,...,...,...,...,...,...
971,2023-05-03,AAPL,154.658884,171.658884,148.658884,165.658884,4620047.0,158.082072,160.118779,101.796433,False
972,2023-05-03,AAPL,152.169392,168.169392,148.169392,162.169392,2812963.0,157.913922,160.274659,101.023321,False
973,2023-05-03,AAPL,153.556237,173.556237,152.556237,166.556237,3345363.0,157.701893,160.420225,100.246548,False
974,2023-05-03,AAPL,150.614028,166.614028,144.614028,160.614028,2392847.0,157.612480,160.514338,99.475867,False


In [2]:
data = generate_time_series_data('AAPL', 1000, '30min', 100, 200, 100000, 5000000, 3, [10, 50, 200])

NameError: name 'generate_time_series_data' is not defined

In [3]:
# Random forest

# Prepare the data for the random forest model
X = data.drop(['date', 'ticker', 'buy_signal'], axis=1)
y = data['buy_signal']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Instantiate and fit the random forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict the buy signal for the test data and calculate the accuracy
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy of random forest: {accuracy}')


NameError: name 'data' is not defined

In [9]:
# SVMs

# Prepare the data for the SVM model
X = data.drop(['date', 'ticker', 'buy_signal'], axis=1)
y = data['buy_signal']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate and fit the SVM model
svm = SVC(kernel='linear', C=1, random_state=42)
svm.fit(X_train, y_train)

# Predict the buy signal for the test data and calculate the accuracy
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy of SVM: {accuracy}')


Accuracy of SVM: 0.58


In [16]:
#Using PyTorch 

# Load the necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim


data = generate_time_series_data('AAPL', 1000, '30min', 100, 200, 100000, 5000000, 3, [10, 50, 200])


# Prepare the data for the SVM model
X = data.drop(['date', 'ticker', 'buy_signal'], axis=1).values
y = data['buy_signal'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)

# Define the SVM model
class SVM(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(X_train.shape[1], 1)
        
    def forward(self, x):
        x = self.fc(x)
        return x
    
model = SVM()

# Define the loss function and optimizer
criterion = nn.MarginRankingLoss(margin=1)
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Train the SVM model
for epoch in range(100):
    # Forward pass
    y_pred = model(X_train).squeeze()
    loss = criterion(y_pred, 2*y_train-1, torch.ones_like(y_train))
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Print the loss every 10 epochs
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/100], Loss: {loss.item():.4f}')

# Predict the buy signal for the test data and calculate the accuracy
with torch.no_grad():
    y_pred = model(X_test).squeeze().numpy()
y_pred[y_pred >= 0] = 1
y_pred[y_pred < 0] = -1
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy of SVM: {accuracy}')


Epoch [10/100], Loss: 0.0000
Epoch [20/100], Loss: 0.0000
Epoch [30/100], Loss: 0.0000
Epoch [40/100], Loss: 0.0000
Epoch [50/100], Loss: 0.0000
Epoch [60/100], Loss: 0.0000
Epoch [70/100], Loss: 0.0000
Epoch [80/100], Loss: 0.0000
Epoch [90/100], Loss: 0.0000
Epoch [100/100], Loss: 0.0000
Accuracy of SVM: 0.455


In [4]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

# Generate time series data
data = generate_time_series_data('AAPL', 1000, '1d', 100, 200, 100000, 5000000, 3, [20, 50, 100])

# Prepare the data for the CNN model
X = data.drop(['date', 'ticker', 'buy_signal'], axis=1).values.reshape((-1, X.shape[1], 1))
y = data['buy_signal'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate and compile the CNN model
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the CNN model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Predict the buy signal for the test data and calculate the accuracy
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy of CNN: {accuracy}')


: 

: 