In [2]:
# Import libraries 
import yfinance as yf
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [3]:
stock_symbol = 'AAPL'
data = yf.download(stock_symbol, period='5y', interval='1d')

# Feature Engineering
data['Daily Return'] = data['Close'].pct_change()
data['Moving Average'] = data['Close'].rolling(window=20).mean()
data.dropna(inplace=True)

# Define features and target
features = ['Daily Return', 'Moving Average']
data['Target'] = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)  # 1: Buy, 0: Sell

# Split data into training and testing sets
X = data[features]
y = data['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Predict on the test set and evaluate
y_pred = model.predict(X_test_scaled)

# Preparing to predict for the future
# Use the most recent data to generate features for future prediction
recent_data = data[-20:].copy()  # Make sure to copy to avoid warnings

# Generate features for prediction
recent_data['Daily Return'] = recent_data['Close'].pct_change()
recent_data['Moving Average'] = recent_data['Close'].rolling(window=20).mean()
recent_data.dropna(inplace=True)

# Display recent data and calculated features
print("Most Recent Data:\n", recent_data.tail(10))

# Use only the most recent row for future prediction, keeping it in DataFrame format
latest_features = recent_data[features].iloc[-1:]

# Show the feature values before scaling
print("\nFeature Values for Prediction (Unscaled):\n", latest_features)

# Predict using the trained model
latest_features_scaled = scaler.transform(latest_features)
future_prediction = model.predict(latest_features_scaled)

# Interpret the prediction
action = "Buy" if future_prediction[0] == 1 else "Sell"

# Show scaled feature values and prediction
print("\nFeature Values for Prediction (Scaled):\n", latest_features_scaled)
print("Future prediction for the next day: ", action)

[*********************100%%**********************]  1 of 1 completed

Most Recent Data:
                   Open        High         Low       Close   Adj Close  \
Date                                                                     
2024-08-23  225.658997  228.220001  224.339996  226.839996  226.839996   

              Volume  Daily Return  Moving Average  Target  
Date                                                        
2024-08-23  38607139      0.010288      219.733501       0  

Feature Values for Prediction (Unscaled):
             Daily Return  Moving Average
Date                                    
2024-08-23      0.010288      219.733501

Feature Values for Prediction (Scaled):
 [[0.41537491 2.45170755]]
Future prediction for the next day:  Sell



