# 1. Lets use YAHOO FINANCE API for data gathering

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf  # Yahoo Finance API
from datetime import datetime, timedelta
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import xgboost as xgb

In [None]:
# Set the ticker symbol, start date, and end date
ticker = "AAPL"  # Apple Inc.
start_date = "2018-01-01"
end_date = "2022-12-31"

# Download the stock data
data = yf.download(ticker, start=start_date, end=end_date)

# Display information about the data
print("Data shape:", data.shape)
print("\nDate range:")
print(f"Start: {data.index.min()}")
print(f"End: {data.index.max()}")
print("\nFirst few rows:")
print(data.head())

In [None]:
# Check for any missing values
print("\nMissing values in each columns:")
print(data.isnull().sum())
#Basic Stats
print("\Basic Statistics:")
print(data.describe())

In [None]:
#Plor the closing price over time
plt.figure(figsize=(15,7))
plt.plot(data.index, data['Close'])
plt.title(f'{ticker} Stock Price (Close)')
plt.xlabel('Data')
plt.ylabel('Price(USD)')
plt.grid(True)
plt.tight_layout()
plt.show()

# 2. Feature Engineering for Time-Series

In [None]:
# Create a copy of the original data
stock_data = data.copy()

#Extract Date features
stock_data['Year'] = stock_data.index.year
stock_data['Month'] = stock_data.index.month
stock_data['Day'] = stock_data.index.day
stock_data['DayOfWeek'] = stock_data.index.dayofweek

#Calculate price-based features
stock_data['PriceRange'] = stock_data['High']-stock_data['Low']
stock_data['DailyReturn'] = stock_data['Close'].pct_change()

# Create Lagged features (previous days' prices)
for lag in range(1, 6):
    stock_data[f'Close_Lag_{lag}'] = stock_data['Close'].shift(lag)
    stock_data[f'Volume_Lag_{lag}'] = stock_data['Volume'].shift(lag)

# Calculate moving average
for window in [5, 10, 20, 50]: #common moving average windows
    stock_data[f'MA_{window}'] = stock_data['Close'].rolling(window=window).mean()

# Calculate technical indicators
# Relative Strength Indec (RSI) -  simplfied version
delta = stock_data['Close'].diff()
gain = delta.where(delta>0,0)
loss = -delta.where(delta<0,0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain/avg_loss
stock_data['RSI'] = 100-(100/(1+rs))

# Moving Average Convergence Divergence (MACD)
stock_data['EMA_12'] = stock_data['Close'].ewm(span=12).mean()
stock_data['EMA_26'] = stock_data['Close'].ewm(span=26).mean()
stock_data['MACD'] = stock_data['EMA_12'] - stock_data['EMA_26']
stock_data['MACD_Signal'] = stock_data['MACD'].ewm(span=9).mean()

# DROP rows with NAN values
stock_data = stock_data.dropna()

print("Features create. New Shape:", stock_data.shape)
print("\nColumns:", stock_data.columns.tolist())

In [None]:
# Visualize Key Features
# Plot closing price with moving averages
plt.figure(figsize=(15, 7))
plt.plot(stock_data.index, stock_data['Close'], label='Close Price')
plt.plot(stock_data.index, stock_data['MA_5'], label='5-Day MA', linestyle='-')
plt.plot(stock_data.index, stock_data['MA_10'], label='10-Day MA', linestyle=':')
plt.plot(stock_data.index, stock_data['MA_20'], label='20-Day MA', linestyle='--')
plt.plot(stock_data.index, stock_data['MA_50'], label='50-Day MA', linestyle='-.')
plt.title(f'{ticker} Stock Price with Moving Averages')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Plot RSI
plt.figure(figsize=(15, 5))
plt.plot(stock_data.index, stock_data['RSI'])
plt.axhline(y=70, color='r', linestyle='--', alpha=0.5)  # Overbought line
plt.axhline(y=30, color='g', linestyle='--', alpha=0.5)  # Oversold line
plt.title('Relative Strength Index (RSI)')
plt.xlabel('Date')
plt.ylabel('RSI')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Plot MACD
plt.figure(figsize=(15, 5))
plt.plot(stock_data.index, stock_data['MACD'], label='MACD')
plt.plot(stock_data.index, stock_data['MACD_Signal'], label='Signal Line')
plt.title('Moving Average Convergence Divergence (MACD)')
plt.xlabel('Date')
plt.ylabel('MACD')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Define prediction target: next day's closing price
stock_data['Target'] = stock_data['Close'].shift(-1) # tomorrow's closing price
# Drop specifically the last row which has no target
stock_data = stock_data.iloc[:-1]
# Then drop any remaining rows with NaN values from our feature engineering
stock_data = stock_data.dropna()

# Define features and target
