In [10]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [11]:
msft = yf.Ticker('MSFT')
msft

yfinance.Ticker object <MSFT>

In [21]:
stock_data = msft.history('2y')

In [22]:
# Assuming the DataFrame is sorted by date in ascending order
# We will use 80% of the data for training and the last 20% for testing
split_ratio = 0.8
split_index = int(len(stock_data) * split_ratio)

train_data = stock_data.iloc[:split_index]
test_data = stock_data.iloc[split_index:]

# Shift the target variable (y) for the training data to predict the future
DAYS_OUT_TO_PREDICT = 10  # Modify this value as needed
y_train = train_data['Close'].shift(-DAYS_OUT_TO_PREDICT).dropna()
X_train = train_data.drop(['Close'], axis=1).iloc[:-DAYS_OUT_TO_PREDICT]

X_test = test_data.drop(['Close'], axis=1)
y_test = test_data['Close']


In [23]:

# Create a list of regressors you want to try
regressors = [
    LinearRegression(),
    RandomForestRegressor(),
    # SVR(kernel='linear'), - tar lang tid
    XGBRegressor(),
    # Add more regressors as per your choice
]


In [24]:
best_mse = float('inf')
best_regressor = None

In [25]:
for regressor in regressors:
    # Creating the regressor
    model = regressor

    # Fitting the model on the training data
    model.fit(X_train, y_train)

    # Making predictions on the test data
    y_pred = model.predict(X_test)

    # Evaluating the model (You can use various metrics to evaluate the model's performance)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Regressor: {regressor.__class__.__name__}")
    print(f"Mean Squared Error (MSE): {mse}")

    if mse < best_mse:
        best_mse = mse
        best_regressor = model

    print()

Regressor: LinearRegression
Mean Squared Error (MSE): 34.442455037525015

Regressor: RandomForestRegressor
Mean Squared Error (MSE): 150.2589341518314

Regressor: XGBRegressor
Mean Squared Error (MSE): 174.407281119139



In [27]:
joblib.dump(best_regressor, 'best_model.joblib')

# Now, you can use the saved model in another system or script.
# For example, in a different system, you can load the model and make predictions:

# Load the model from the file
loaded_model = joblib.load('best_model.joblib')

# Assuming 'future_data' is the DataFrame containing future data
future_close_prices = loaded_model.predict(X_test)