In [54]:
import yfinance as yf
import numpy as np
import pandas as pd
from datetime import datetime, timedelta, time
from pytz import timezone
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

# Fetch data from Yahoo Finance
symbol = "AMD"
amd = yf.Ticker(symbol)
pacific = timezone('America/Los_Angeles')
today_pacific = datetime.now(pacific)
thirty_days_ago_pacific = today_pacific - timedelta(days=59)
data = amd.history(interval='15m', start=thirty_days_ago_pacific, end=today_pacific)

# Identify first 3 hours of trading (9:30 AM to 12:30 PM)
market_open = time(6, 30)
first_3_hours = [t for t in data.index if market_open <= t.time() <= time(15, 30)]

# Add basic features
data['Hour'] = data.index.hour
data['Minute'] = data.index.minute


data['PriceRange'] = data['High'] - data['Low']
data['AveragePrice'] = (data['High'] + data['Low'] + data['Close']) / 3
data['MovingAverage'] = data['Close'].rolling(window=3).mean().fillna(data['Close'])

# Add lag features
for lag in range(1, 4):
    data[f'PriceRange_lag{lag}'] = data['PriceRange'].shift(lag)
    data[f'AveragePrice_lag{lag}'] = data['AveragePrice'].shift(lag)
    data[f'MovingAverage_lag{lag}'] = data['MovingAverage'].shift(lag)



data['Returns'] = data['Close'].pct_change()
data['PriceChange'] = data['Close'].diff()
data['Volatility'] = data['Close'].rolling(window=5).std()
data['Volume'] = amd.history(interval='15m', start=thirty_days_ago_pacific, end=today_pacific)['Volume']
data['PercentIncrease'] = data['Close'].pct_change() * 100

ordered_columns = [
    'Hour', 'Minute',
    'PriceRange_lag1', 'AveragePrice_lag1', 'MovingAverage_lag1',
    'PriceRange_lag2', 'AveragePrice_lag2', 'MovingAverage_lag2',
    'PriceRange_lag3', 'AveragePrice_lag3', 'MovingAverage_lag3',
    'PriceRange', 'AveragePrice', 'MovingAverage',
    'Returns', 'PriceChange', 'Volatility', 'Volume', 'PercentIncrease'
]

data = data[ordered_columns]  # Reorder the DataFrame columns


# Determine if profit can be made within the next 2 hours
def profitable_within_next_4_hours(df, start_time):
    end_time = min(start_time + timedelta(hours=2), start_time.replace(hour=16, minute=0))
    relevant_data = df[(df.index > start_time) & (df.index <= end_time)]
    if not relevant_data.empty:
        return relevant_data['MovingAverage'].max() - df.at[start_time, 'MovingAverage'] >= 1
    return False

first_3_hours_df = data.loc[first_3_hours]
first_3_hours_df['Profitable'] = [profitable_within_next_4_hours(data, t) for t in first_3_hours_df.index]

# Merge with the original data
data['Profitable'] = np.nan
data['Profitable'] = first_3_hours_df['Profitable'].combine_first(data['Profitable'])
data = data.dropna()

# Convert labels to binary
data['Profitable'] = data['Profitable'].astype(int)

# Build pipeline and train model
# Update feature list
features = ['Hour', 'Minute',
    'PriceRange_lag1', 'AveragePrice_lag1', 'MovingAverage_lag1',
    'PriceRange_lag2', 'AveragePrice_lag2', 'MovingAverage_lag2',
    'PriceRange_lag3', 'AveragePrice_lag3', 'MovingAverage_lag3',
    'PriceRange', 'AveragePrice', 'MovingAverage',
    'Returns', 'PriceChange', 'Volatility', 'Volume', 'PercentIncrease']

X = data[features]
y = data['Profitable']

# Adjust test size if necessary
test_size = min(0.2, max(1 / len(X), 0.2))

# Re-split and re-train the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)

# Rebuild the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression())
])

pipeline.fit(X_train, y_train)

print(f"Number of samples: {len(X)}")
print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")
print(f"Model accuracy: {pipeline.score(X_test, y_test)}")

Number of samples: 1021
Training samples: 816
Testing samples: 205
Model accuracy: 0.8292682926829268


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Profitable'] = data['Profitable'].astype(int)


In [55]:
import tensorflow as tf
import numpy as np
import random

# Set seed for reproducibility
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)

from tensorflow.keras import layers, models, optimizers
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
from imblearn.over_sampling import SMOTE

# Convert DataFrame X to a numpy array
X_array = X.to_numpy()

sequence_length = X.shape[1]
num_features = X.shape[2] if X.ndim > 2 else 1

# Reshape the numpy array
X_res = X_array.reshape(-1, sequence_length * num_features)

smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_res, y)

# Reshape back to original dimensions
X_res = X_res.reshape(-1, sequence_length, num_features)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)

# Compute class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))


# Build the model
model = models.Sequential([
    layers.Input(shape=(sequence_length, num_features)),
    layers.LSTM(64, activation='tanh'),
    layers.Dropout(0.25),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.AUC(name='auc')
    ]
)
model.summary()

# Train the model
model.fit(
    X_train, y_train, epochs=200, batch_size=32, validation_split=0.2,
    class_weight=class_weights
)

# Evaluate the model
predictions = model.predict(X_test)
predictions = (predictions > 0.5).astype(int).flatten()

test_accuracy = np.mean(predictions == y_test)
precision = precision_score(y_test, predictions, zero_division=0)
recall = recall_score(y_test, predictions, zero_division=0)
f1 = f1_score(y_test, predictions, zero_division=0)
roc_auc = roc_auc_score(y_test, predictions)
confusion = confusion_matrix(y_test, predictions)

print(f"Test accuracy: {test_accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC-AUC Score: {roc_auc:.4f}")
print("Confusion Matrix:\n", confusion)
print("Classification Report:\n", classification_report(y_test, predictions, zero_division=0))


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 64)                16896     
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_5 (Dense)             (None, 1)                 65        
                                                                 
Total params: 16961 (66.25 KB)
Trainable params: 16961 (66.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch

In [57]:
import yfinance as yf
import numpy as np

def predict_next_2_hours_profit(shares, model):
    # Fetch recent stock data
    latest_data = yf.Ticker("AMD").history(period="5d", interval="15m")

    # Calculate features related to time first
    latest_data['Hour'] = latest_data.index.hour
    latest_data['Minute'] = latest_data.index.minute

    # Calculate direct features from stock data
    latest_data['PriceRange'] = latest_data['High'] - latest_data['Low']
    latest_data['AveragePrice'] = (latest_data['High'] + latest_data['Low'] + latest_data['Close']) / 3
    latest_data['MovingAverage'] = latest_data['Close'].rolling(window=3).mean().fillna(latest_data['Close'])

    # Calculate lag features
    for lag in range(1, 4):
        latest_data[f'PriceRange_lag{lag}'] = latest_data['PriceRange'].shift(lag)
        latest_data[f'AveragePrice_lag{lag}'] = latest_data['AveragePrice'].shift(lag)
        latest_data[f'MovingAverage_lag{lag}'] = latest_data['MovingAverage'].shift(lag)

    # Calculate dynamic features that can influence immediate trading decisions
    latest_data['Returns'] = latest_data['Close'].pct_change()
    latest_data['PriceChange'] = latest_data['Close'].diff()
    latest_data['Volatility'] = latest_data['Close'].rolling(window=5).std()
    latest_data['Volume'] = latest_data['Volume']
    latest_data['PercentIncrease'] = latest_data['Close'].pct_change() * 100

    # Drop rows with NaN values which are typically present due to rolling calculations
    latest_data.dropna(inplace=True)

    # Define feature columns in the desired order for the model input
    ordered_feature_columns = [
        'Hour', 'Minute',
    'PriceRange_lag1', 'AveragePrice_lag1', 'MovingAverage_lag1',
    'PriceRange_lag2', 'AveragePrice_lag2', 'MovingAverage_lag2',
    'PriceRange_lag3', 'AveragePrice_lag3', 'MovingAverage_lag3',
    'PriceRange', 'AveragePrice', 'MovingAverage',
    'Returns', 'PriceChange', 'Volatility', 'Volume', 'PercentIncrease'
    ]

    if len(latest_data) < 15:
        raise ValueError("Not enough data to make a prediction.")

    # Select and reorder data for the model prediction
    latest_features = latest_data[ordered_feature_columns]

    # Model prediction
    prediction_prob = model.predict(latest_features)[-10:]  # Consider last 10 predictions
    print(prediction_prob)
    average_prob = prediction_prob.mean()
    should_enter_trade = average_prob > 0.5

    # Retrieve latest metrics for detailed output
    latest_metrics = latest_data.iloc[-1]
    close_price = latest_metrics['Close']
    average_price = latest_metrics['AveragePrice']
    moving_average = latest_metrics['MovingAverage']

    # Calculate potential profit or loss
    if should_enter_trade:
        predicted_future_price = close_price + (2 * average_prob)
        predicted_profit = (predicted_future_price - close_price) * shares
    else:
        predicted_future_price = close_price - (2 * average_prob)
        predicted_profit = (close_price - predicted_future_price) * shares

    decision = "enter" if should_enter_trade else "DO NOT ENTER"
    print(f"{decision} the trade with a predicted future price of ${predicted_future_price:.2f}.")
    print(f"Prediction probability: {average_prob:.4f}")
    print(f"Predicted profit/loss: ${predicted_profit:.2f}")


    return should_enter_trade, predicted_profit

# Usage example
try:
    shares = 20
    should_enter_trade, predicted_profit = predict_next_2_hours_profit(shares, model)
except ValueError as e:
    print(f"Error making prediction: {e}")


[[0.29057485]
 [0.18620762]
 [0.24984536]
 [0.04673165]
 [0.00792403]
 [0.03953189]
 [0.00222253]
 [0.00224871]
 [0.00129906]
 [0.0015697 ]]
DO NOT ENTER the trade with a predicted future price of $166.20.
Prediction probability: 0.0828
Predicted profit/loss: $3.31
