In [1]:
# Part 1: Import Libraries and Fetch Data
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

# Set style for plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Select stock 
stock_symbol = 'TSLA'  # Tesla
# stock_symbol = 'AAPL'  # Apple
# stock_symbol = 'MSFT'  # Microsoft
# stock_symbol = 'GOOGL' # Google

print(f"Downloading data for {stock_symbol}...")

# Download historical data
stock_data = yf.download(stock_symbol, period='5y')
print(f"Downloaded {len(stock_data)} days of data")
print("\nFirst few rows of data:")
print(stock_data.head())

Downloading data for TSLA...


[*********************100%***********************]  1 of 1 completed

Downloaded 1256 days of data

First few rows of data:
Price            Close        High         Low        Open     Volume
Ticker            TSLA        TSLA        TSLA        TSLA       TSLA
Date                                                                 
2021-02-22  238.166672  256.166656  236.733337  254.213333  111809100
2021-02-23  232.946671  237.869995  206.333328  220.710007  199820700
2021-02-24  247.339996  248.333328  231.389999  237.283340  110301000
2021-02-25  227.406662  245.736664  223.526672  242.050003  117071700
2021-02-26  225.166672  235.566666  219.836670  233.333328  123267600





In [2]:
# Part 2: Create Features for Prediction
print("\nCreating features for prediction...")

# Create a copy of the data
df = stock_data.copy()

# Create target variable (next day's closing price)
df['Next_Close'] = df['Close'].shift(-1)

# Create additional features
df['High_Low_Pct'] = (df['High'] - df['Low']) / df['Close'] * 100
df['Close_Open_Pct'] = (df['Close'] - df['Open']) / df['Open'] * 100

# Create rolling averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()
df['MA_20'] = df['Close'].rolling(window=20).mean()

# Create volatility features
df['Volatility'] = df['Close'].rolling(window=10).std()

# Create lag features (previous days' prices)
df['Prev_Close'] = df['Close'].shift(1)
df['Prev_Volume'] = df['Volume'].shift(1)

# Drop NaN values
df = df.dropna()

print(f"Dataset shape after feature engineering: {df.shape}")
print("\nFeatures created:")
print(df[['High_Low_Pct', 'Close_Open_Pct', 'MA_5', 'Volatility']].head())


Creating features for prediction...
Dataset shape after feature engineering: (1236, 14)

Features created:
Price      High_Low_Pct Close_Open_Pct        MA_5 Volatility
Ticker                                                       
Date                                                         
2021-03-19     4.979612       1.278991  226.310663  14.015235
2021-03-22     4.607460      -2.131207  223.781329   6.549333
2021-03-23     3.064215      -2.013999  222.799997   6.792841
2021-03-24     6.014886      -5.635494  218.030664   8.488300
2021-03-25     5.621574       4.468192  217.179330   8.666120


In [None]:
# Part 3: Prepare Data for Training
print("\nPreparing data for training...")

# Select features for prediction
feature_columns = ['Open', 'High', 'Low', 'Volume', 
                   'High_Low_Pct', 'Close_Open_Pct',
                   'MA_5', 'MA_10', 'MA_20', 
                   'Volatility', 'Prev_Close', 'Prev_Volume']

X = df[feature_columns]
y = df['Next_Close']

# Split the data (chronologically)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Training data size: {len(X_train)} days")
print(f"Testing data size: {len(X_test)} days")
print(f"\nFeatures used: {', '.join(feature_columns)}")