# Task2 : Predicting future stock price

In [8]:

import pandas as pd # pandas is used for data manipulation and analysis
import matplotlib.pyplot as plt # matplotlib is used for plotting graphs
import yfinance as yf # yfinance is used to fetch financial data from Yahoo Finance

In [32]:
# Step 1: Load historical stock data
ticker = 'AAPL'  # Apple stock
data = yf.download(ticker, start='2018-01-01', end='2023-01-01', progress=False)

# Check if data is retrieved
if data.empty:
    raise ValueError("No data retrieved from yfinance. Check ticker or internet connection.")
print("Data shape after download:", data.shape)


  data = yf.download(ticker, start='2018-01-01', end='2023-01-01', progress=False)


Data shape after download: (1259, 5)


In [33]:
data

Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2018-01-02,40.426811,40.436201,39.722757,39.933975,102223600
2018-01-03,40.419785,40.964255,40.356422,40.490191,118071600
2018-01-04,40.607540,40.710802,40.384590,40.492543,89738400
2018-01-05,41.069859,41.156691,40.612224,40.703751,94640000
2018-01-08,40.917336,41.213037,40.818765,40.917336,82271200
...,...,...,...,...,...
2022-12-23,130.173798,130.726634,127.982185,129.245816,63814900
2022-12-27,128.367172,129.729530,127.073927,129.699914,69007800
2022-12-28,124.428200,129.354385,124.260376,128.011777,85438400
2022-12-29,127.952560,128.811430,126.096604,126.353274,75703700


In [34]:
# Step 2: Feature engineering
data['SMA_20'] = data['Close'].rolling(window=20).mean()
data['SMA_50'] = data['Close'].rolling(window=50).mean()
data['Daily_Return'] = data['Close'].pct_change()
data = data.dropna()  # Drop rows with NaN values
print("Data shape after dropna:", data.shape)

Data shape after dropna: (1210, 8)


In [28]:
# Step 3: Define features and target
features = ['Open', 'High', 'Low', 'Volume', 'SMA_20', 'SMA_50', 'Daily_Return']
X = data[features]
y = data['Close'].shift(-1)[:-1]  # Next day's closing price (shifted)
X = X[:-1]  # Align X with y by removing the last row

In [36]:
X

Price,Open,High,Low,Volume,SMA_20,SMA_50,Daily_Return
Ticker,AAPL,AAPL,AAPL,AAPL,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2


In [37]:
# Step 3: Define features and target
features = ['Open', 'High', 'Low', 'Volume', 'SMA_20', 'SMA_50', 'Daily_Return']
X = data[features]
y = data['Close'].shift(-1)[:-1]  # Next day's closing price
X = X[:-1]  # Align with y

In [48]:
print(f"X shape: {X.shape}, y shape: {y.shape}")

X shape: (1209, 7), y shape: (1209, 1)


In [None]:
if X.shape[0] == 0 or y.shape[0] == 0:
    raise ValueError(f"X or y is empty after preprocessing. Check data processing steps.")

In [46]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split

In [47]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, shuffle= False)

print(f"X_train shape: {X_train.shape}, X_test shape: {X_test.shape}")

X_train shape: (967, 7), X_test shape: (242, 7)


In [50]:
# Preprocessing data using ColumnTransformer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

In [51]:
transformer = ColumnTransformer(transformers = [
    ('num', StandardScaler(), features)
    ],
    remainder = 'passthrough')

In [53]:
# Step 4: Createing a machine learning model
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline

In [54]:
# creating pipeline 
rf_pipeline = Pipeline([
    ('preprocessor', transformer),
    ('model', RandomForestRegressor(n_estimators=100, random_state=42))
])

In [59]:
#rf_pipeline.fit(X_train, Y_train)