### Libraries

In [26]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import joblib

### Data Loading

In [27]:
df = pd.read_csv('historical_data.csv')

### Feature Selection

In [28]:
df['date'] = pd.to_datetime(df['date'])
df['date_ordinal'] = df['date'].map(pd.Timestamp.toordinal)
X = df[['date_ordinal']]
y = df['close_price']

### Train-test Split

In [29]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

### Prediction vs Actual

In [None]:
plt.figure(figsize=(10, 6))
X_test['date_ordinal'] = X_test['date_ordinal'].map(pd.Timestamp.fromordinal)
X_train['date_ordinal'] = X_train['date_ordinal'].map(pd.Timestamp.fromordinal)
plt.scatter(X_test, y_test, color='blue', label='Actual prices')
plt.plot(X_test, y_pred, color='red', label='Predicted prices')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title('Stock Price Prediction using Linear Regression')
plt.legend()
plt.show()

### Predicting 30 days ahead

In [None]:
future_days = 30
last_date_ordinal = df['date_ordinal'].max()
future_dates = np.array(
    [last_date_ordinal + i for i in range(1, future_days + 1)]).reshape(-1, 1)
future_predictions = model.predict(future_dates)

for i, pred in enumerate(future_predictions):
    print(f"Day {i + 1}: Predicted close price: {pred:.2f}")

In [None]:
joblib.dump(model, 'stock_price_prediction_model.pkl')

model = joblib.load('stock_price_prediction_model.pkl')
future_predictions = model.predict(future_dates)

for i, pred in enumerate(future_predictions):
    print(f"Day {i + 1}: Predicted close price: {pred:.2f}")
