<a href="https://colab.research.google.com/github/mjgpinheiro/Physics_models/blob/Econophysics/ML_StockPrediction1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Download data
data = yf.download('AAPL', start='2019-01-01', end='2023-03-27')
prices = data['Close'].to_numpy()

# Create feature vectors using past 5 days' closing prices
def create_feature_vectors(prices, n_days=5):
    X = []
    for i in range(n_days, len(prices)):
        X.append(prices[i-n_days:i])
    return np.array(X)

X = create_feature_vectors(prices)
y = np.sign(np.diff(prices[4:]))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression model
clf = LogisticRegression()
clf.fit(X_train_scaled, y_train)

# Make predictions on test set
y_pred = clf.predict(X_test_scaled)

# Calculate accuracy
accuracy = np.sum(y_pred == y_test) / len(y_pred)
print(f"Accuracy: {accuracy}")

def predict(prices):
    X = create_feature_vectors(prices)
    y = np.sign(np.diff(prices[4:]))

    X_train_scaled = scaler.transform(X[:-1])
    y_pred = clf.predict(X_train_scaled)

    predictions = []
    for i in range(1, len(y_pred)):
        if y_pred[i] > y_pred[i-1]:
            predictions.append("buy")
        elif y_pred[i] < y_pred[i-1]:
            predictions.append("sell")
        else:
            predictions.append("hold")
    
    return predictions

data = yf.download('AAPL', start='2022-03-16', end='2023-04-05')
prices = data['Close'].to_numpy()
predictions = predict(prices)
for i in range(len(predictions)):
    print(f"Day {i+1}: {predictions[i]}")


# Generate buy, sell, or hold signals
def predict(prices):
    ensemble_preds = prices['Close'].tolist()
    predictions = []
    for i in range(1, len(ensemble_preds)):
        if ensemble_preds[i] > ensemble_preds[i-1]:
            predictions.append("buy")
        elif ensemble_preds[i] < ensemble_preds[i-1]:
            predictions.append("sell")
        else:
            predictions.append("hold")

    return predictions

data = yf.download('AAPL', start='2022-03-16', end='2023-04-05')
prices = data['Close'].to_frame()
predictions = predict(prices)

# Get the dates from the prices DataFrame
dates = prices.index[1:]

# Print the predictions with their associated dates
for i in range(len(predictions)):
    print(f"Date {dates[i]}: {predictions[i]}")
