This script predicts future stock prices using Linear Regression. It reads historical stock data from a CSV file, splits the data into training and testing sets, and trains a Linear Regression model. The model's performance is evaluated using R-squared. The script then uses the model to predict stock prices for a specified number of future days and prints these predictions.

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

def predict_future_prices(dataframe, num_days):
    X = dataframe.index.astype('int64') // 10**9  # Convert dates to Unix timestamp (seconds since epoch)
    y = dataframe['Close']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    model = LinearRegression()
    model.fit(X_train.values.reshape(-1, 1), y_train)

    train_score = model.score(X_train.values.reshape(-1, 1), y_train)
    test_score = model.score(X_test.values.reshape(-1, 1), y_test)

    print(f'Training R-squared: {train_score:.2f}')
    print(f'Testing R-squared: {test_score:.2f}')

    last_date = dataframe.index[-1]
    future_dates = pd.date_range(start=last_date, periods=num_days, freq='D')
    future_timestamps = (future_dates - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
    future_prices = model.predict(future_timestamps.values.reshape(-1, 1))

    for date, price in zip(future_dates, future_prices):
        print(f'Predicted price for {date.date()}: ${price:.2f}')

df = pd.read_csv('GOOG.csv', parse_dates=['Date'], index_col='Date')

num_days = 7
predict_future_prices(df, num_days)

This script predicts future stock prices using Ridge Regression. It reads historical stock data, splits it into training and testing sets, and trains a Ridge Regression model. The model's performance is evaluated using R-squared. The script then uses the model to predict stock prices for a specified number of future days and prints these predictions.

In [4]:
from sklearn.linear_model import Ridge

def predict_future_prices_ridge(dataframe, num_days):
    X = dataframe.index.astype('int64') // 10**9  
    y = dataframe['Close']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    model = Ridge()
    model.fit(X_train.values.reshape(-1, 1), y_train)

    train_score = model.score(X_train.values.reshape(-1, 1), y_train)
    test_score = model.score(X_test.values.reshape(-1, 1), y_test)

    print(f'Training R-squared: {train_score:.2f}')
    print(f'Testing R-squared: {test_score:.2f}')

    last_date = dataframe.index[-1]
    future_dates = pd.date_range(start=last_date, periods=num_days, freq='D')
    future_timestamps = (future_dates - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
    future_prices = model.predict(future_timestamps.values.reshape(-1, 1))

    for date, price in zip(future_dates, future_prices):
        print(f'Predicted price for {date.date()}: ${price:.2f}')

num_days = 7
predict_future_prices_ridge(df, num_days)

Training R-squared: 0.85
Testing R-squared: -3.77
Predicted price for 2023-12-05: $65.82
Predicted price for 2023-12-06: $65.83
Predicted price for 2023-12-07: $65.84
Predicted price for 2023-12-08: $65.85
Predicted price for 2023-12-09: $65.86
Predicted price for 2023-12-10: $65.87
Predicted price for 2023-12-11: $65.88
