# 📘 Google Stock Price Analysis & Forecasting (5-Year)
# This notebook provides an end-to-end EDA, visual insights, and predictive analysis
# for Google's stock prices. All plots are interactive and suitable for investment insights.

# -------------------------------
# 📦 STEP 1: Import Libraries
# -------------------------------
# Importing essential libraries for data handling, visualization, and modeling

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_squared_error
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import warnings
warnings.filterwarnings("ignore")

# -------------------------------
# 📥 STEP 2: Load & Clean Dataset
# -------------------------------

In [None]:
# Load CSV file
df = pd.read_csv("/kaggle/input/2020-2025-google-stock-dataset/google_5yr_one.csv")

# Drop the first row which contains repeated headers
df = df.drop(index=0)

# Convert the 'Date' column to datetime format for time-series handling
df['Date'] = pd.to_datetime(df['Date'])

# Convert all necessary columns to float type for numerical analysis
for col in ['Open', 'High', 'Low', 'Close', 'Volume']:
    df[col] = df[col].astype(float)

# Set the date as the DataFrame index
df.set_index('Date', inplace=True)

# -------------------------------
# 📊 STEP 3: Extended EDA
# -------------------------------
# Calculate daily price spread as the difference between high and low prices

In [None]:
df['High-Low Spread'] = df['High'] - df['Low']
# Plot the daily price spread to examine volatility
fig_spread = px.line(df, x=df.index, y='High-Low Spread', title='Daily Price Spread (High - Low)',
                     template='plotly_dark')
fig_spread.show()

# Visualize candlestick chart to show open, high, low, close prices interactively
candlestick = go.Figure(data=[go.Candlestick(x=df.index,
                open=df['Open'], high=df['High'],
                low=df['Low'], close=df['Close'])])
candlestick.update_layout(title='GOOGL Candlestick Chart (5Y)',
                           template='plotly_dark')
candlestick.show()

# Plot correlation heatmap between price-related features to understand dependencies
plt.figure(figsize=(8,6))
sns.heatmap(df[['Open', 'High', 'Low', 'Close', 'Volume']].corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap of GOOGL Price Features")
plt.tight_layout()
plt.show()


# ----------------------------------
# 📈 Technical Indicators
# ----------------------------------

In [None]:
# RSI (Relative Strength Index): Identifies overbought or oversold conditions
# Step 1: Calculate daily changes in price
delta = df['Close'].diff()
# Step 2: Separate positive and negative gains
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
# Step 3: Calculate rolling averages
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
# Step 4: Compute RSI value
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Plot RSI values with thresholds
plt.figure(figsize=(12,4))
plt.plot(df['RSI'], label='RSI')
plt.axhline(70, color='red', linestyle='--', label='Overbought')
plt.axhline(30, color='green', linestyle='--', label='Oversold')
plt.title('Relative Strength Index (RSI)')
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()

# Bollinger Bands: Indicate volatility bands around moving average
# Calculate 20-day moving average and standard deviation
df['MA20'] = df['Close'].rolling(window=20).mean()
df['STD20'] = df['Close'].rolling(window=20).std()
# Calculate upper and lower bands
df['Upper Band'] = df['MA20'] + 2 * df['STD20']
df['Lower Band'] = df['MA20'] - 2 * df['STD20']

# Plot Bollinger Bands along with closing prices
fig_bb = go.Figure()
fig_bb.add_trace(go.Scatter(x=df.index, y=df['Close'], name='Close'))
fig_bb.add_trace(go.Scatter(x=df.index, y=df['Upper Band'], name='Upper Band'))
fig_bb.add_trace(go.Scatter(x=df.index, y=df['Lower Band'], name='Lower Band'))
fig_bb.update_layout(title='Bollinger Bands (20 Day)', template='plotly_dark')
fig_bb.show()

# MACD (Moving Average Convergence Divergence): Tracks trend direction and momentum
# Calculate short and long-term EMAs
ema12 = df['Close'].ewm(span=12, adjust=False).mean()
ema26 = df['Close'].ewm(span=26, adjust=False).mean()
# Calculate MACD and signal line
df['MACD'] = ema12 - ema26
df['Signal Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Plot MACD and Signal line
plt.figure(figsize=(12,4))
plt.plot(df['MACD'], label='MACD')
plt.plot(df['Signal Line'], label='Signal Line')
plt.title('MACD & Signal Line')
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()


# ----------------------------------
# 📈 Autocorrelation & PACF Analysis
# ----------------------------------

In [None]:
# ACF and PACF help identify lags and seasonality patterns
plt.figure(figsize=(10,4))
plot_acf(df['Close'].dropna(), lags=50)
plt.title("Autocorrelation (ACF) - Closing Price")
plt.tight_layout()
plt.show()

plt.figure(figsize=(10,4))
plot_pacf(df['Close'].dropna(), lags=50)
plt.title("Partial Autocorrelation (PACF) - Closing Price")
plt.tight_layout()
plt.show()


# -------------------------------
# 🔮 STEP 4: Time Series Forecasting
# -------------------------------

In [None]:
# Prophet Model: Additive model developed by Facebook for seasonality trends
# Prepare data for Prophet format
prophet_df = df[['Close']].reset_index()
prophet_df.columns = ['ds', 'y']

# Initialize Prophet model and fit it
prophet_model = Prophet(daily_seasonality=False, yearly_seasonality=True)
prophet_model.fit(prophet_df)

# Create future dataframe for next 90 days
future = prophet_model.make_future_dataframe(periods=90)
forecast = prophet_model.predict(future)

# 📈 Plot interactive forecast using Plotly
fig_prophet = px.line(forecast, x='ds', y='yhat', title='📈 GOOGL Stock Price Forecast - Prophet',
                      labels={'ds': 'Date', 'yhat': 'Forecasted Price'}, template='plotly_dark')
fig_prophet.add_scatter(x=prophet_df['ds'], y=prophet_df['y'], mode='lines',
                        name='Historical Close', line=dict(color='cyan'))
fig_prophet.show()


# 💬 Insight: Prophet captures seasonal trends and offers a smooth projection of GOOGL prices.
# It indicates a steady rise in price, suggesting investor confidence and growth momentum.

In [None]:
# ARIMA Model: Auto Regressive Integrated Moving Average for short-term predictions
# Fit ARIMA model with order (5,1,0)
arima_model = ARIMA(df['Close'], order=(5,1,0))
arima_result = arima_model.fit()

# Forecast next 90 business days
arima_forecast = arima_result.forecast(steps=90)
forecast_dates = pd.date_range(df.index[-1], periods=90, freq='B')

# 📈 Plot ARIMA forecast with Plotly
fig_arima = go.Figure()
fig_arima.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Historical Close'))
fig_arima.add_trace(go.Scatter(x=forecast_dates, y=arima_forecast, mode='lines', name='ARIMA Forecast'))
fig_arima.update_layout(title='📈 GOOGL Stock Price Forecast - ARIMA', xaxis_title='Date',
                        yaxis_title='Price', template='plotly_dark')
fig_arima.show()

# 💬 Insight: ARIMA provides a short-term prediction with a quick reaction to recent trends.
# Useful for swing traders looking for near-term direction.


In [None]:
# Residual plot for ARIMA: assess model error behavior
residuals = arima_result.resid
fig_resid = px.line(x=df.index[-len(residuals):], y=residuals,
                    title="ARIMA Model Residuals",
                    labels={'x': 'Date', 'y': 'Residual'}, template='plotly_dark')
fig_resid.show()

# 📉 Insight: Residuals show minimal patterns and randomness, indicating good model fit without serial correlation.

# Histogram of residuals using Plotly
fig_resid_hist = px.histogram(residuals, nbins=50, marginal="box",
                              title="Distribution of ARIMA Residuals",
                              labels={'value': 'Residual'}, template='plotly_dark')
fig_resid_hist.show()

# 💬 Insight: The distribution of residuals approximates a normal curve, suggesting that forecast errors are unbiased.

# -------------------------------
# 🧠 FINAL INSIGHTS FOR INVESTORS
# -------------------------------
# 1. 📈 Long-Term Trend: Google (GOOGL) demonstrates a steady long-term uptrend, making it a strong candidate for long-term investors.
# 2. 📉 Volatility Awareness: The High-Low spread and Bollinger Bands indicate regular volatility surges, which are crucial for timing entries and exits.
# 3. 📊 Volume Spikes: Several volume surges are followed by price movements, suggesting news-based or institutional trading activity. Monitoring volume with price action is recommended.
# 4. 🔁 RSI Insights: RSI helps detect overbought (above 70) and oversold (below 30) zones. Recent values show healthy price corrections without severe drawdowns.
# 5. 🕯️ Candlestick Trends: Visual candlestick patterns show strong support and resistance zones over time—valuable for swing and positional trades.
# 6. 📈 MACD Signals: Several MACD bullish crossovers were followed by upward momentum; effective as an early trend confirmation tool.
# 7. 🔮 Prophet Forecast: Predicts continued upward momentum with stable yearly seasonality—suitable for long-term planning and wealth building.
# 8. 📉 ARIMA Insights: ARIMA performs well on short-horizon forecasts. Its residuals show no severe autocorrelation, confirming good model fit.
# 9. 🔍 ACF/PACF Findings: Significant lags in autocorrelation suggest momentum-based investing strategies may be useful for traders.
# 10. 💡 Overall Strategy: For long-term investors—dollar cost averaging (DCA) into GOOGL is a viable strategy. For short-term traders, combine RSI + MACD + Volume for tactical decisions.