In [20]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/netflix-stock-price-history/Netflix_stock_data.csv


In [21]:
df=pd.read_csv('/kaggle/input/netflix-stock-price-history/Netflix_stock_data.csv')
print(df.head())

         Date     Close      High       Low      Open     Volume
0  2002-05-23  1.196429  1.242857  1.145714  1.156429  104790000
1  2002-05-24  1.210000  1.225000  1.197143  1.214286   11104800
2  2002-05-28  1.157143  1.232143  1.157143  1.213571    6609400
3  2002-05-29  1.103571  1.164286  1.085714  1.164286    6757800
4  2002-05-30  1.071429  1.107857  1.071429  1.107857   10154200


# 1. Data Insight

In [22]:
df.shape

(5810, 6)

In [23]:
df.describe()

Unnamed: 0,Close,High,Low,Open,Volume
count,5810.0,5810.0,5810.0,5810.0,5810.0
mean,174.277189,176.778673,171.606167,174.184463,15136080.0
std,238.038218,241.092964,234.690133,237.807511,18365690.0
min,0.372857,0.410714,0.346429,0.377857,285600.0
25%,4.3075,4.406428,4.226071,4.310714,5385225.0
50%,47.330715,48.094999,46.490715,47.347857,9366850.0
75%,319.6875,324.845009,313.472504,319.279991,18033750.0
max,1279.109985,1298.0,1273.810059,1286.839966,323414000.0


In [24]:
df.isnull().sum()

Date      0
Close     0
High      0
Low       0
Open      0
Volume    0
dtype: int64

In [25]:
# Set data as index
df.set_index('Date',inplace=True)

# 2. Vizualization

In [26]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Line plot - Close price
plt.figure(figsize=(14,6))
plt.plot(df['Close'],label='Close Price')
plt.title('Netflix Close Price Over Time')
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(),annot=True,cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.tight_layout()
plt.show()

In [None]:
# Histogram of daily returns
df['Daily_Return'] = df['Close'].pct_change()
plt.figure(figsize=(10, 5))
sns.histplot(df['Daily_Return'].dropna(), bins=100, kde=True)
plt.title("Histogram of Daily Returns")
plt.xlabel("Daily Return")
plt.tight_layout()
plt.show()

# 3. Volatility Analysis and Risk Estimation

In [None]:
# Moving averages
df['MA50'] = df['Close'].rolling(window=50).mean()
df['MA200'] = df['Close'].rolling(window=200).mean()

plt.figure(figsize=(14, 6))
plt.plot(df['Close'], label='Close')
plt.plot(df['MA50'], label='50-day MA')
plt.plot(df['MA200'], label='200-day MA')
plt.title("Netflix Close Price with Moving Averages")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


## **Steps:**

---

### 1. **Calculate Daily Returns**

$$
\text{Return}_t = \frac{P_t - P_{t-1}}{P_{t-1}}
$$

---

### 2. **Rolling Volatility**

$$
\text{Volatility}_t = \text{STD}(\text{Return}_{t-n \text{ to } t})
$$

---

### 3. **Bollinger Bands**

$$
\text{Upper Band} = \text{MA}_{20} + 2 \times \text{STD}_{20}, \quad 
\text{Lower Band} = \text{MA}_{20} - 2 \times \text{STD}_{20}
$$

---

### 4. **Value at Risk (VaR)**

$$
\text{VaR}_{95\%} = \mu - 1.65 \times \sigma
$$

*($\mu$ = mean return, $\sigma$ = std return)*

---

### 5. **GARCH model for volatility clustering**


In [None]:
# 2. Rolling Volatility (30-day window)
df['Rolling_STD_30'] = df['Daily_Return'].rolling(window=30).std()

In [None]:
# Plot Rolling Volatility
plt.figure(figsize=(14, 6))
plt.plot(df['Rolling_STD_30'], label='30-Day Rolling Volatility', color='orange')
plt.title('Netflix Stock 30-Day Rolling Volatility')
plt.xlabel('Date')
plt.ylabel('Volatility')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# 3. Bollinger Bands
df['MA20'] = df['Close'].rolling(window=20).mean()
df['STD20'] = df['Close'].rolling(window=20).std()
df['Upper_Band'] = df['MA20'] + 2 * df['STD20']
df['Lower_Band'] = df['MA20'] - 2 * df['STD20']

In [None]:
# Plot Bollinger Bands
plt.figure(figsize=(14, 6))
plt.plot(df['Close'], label='Close Price')
plt.plot(df['Upper_Band'], label='Upper Band', linestyle='--')
plt.plot(df['Lower_Band'], label='Lower Band', linestyle='--')
plt.title('Bollinger Bands (20-day window)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# 4. Value at Risk (95% confidence)
mean_ret = df['Daily_Return'].mean()
std_ret = df['Daily_Return'].std()
VaR_95 = mean_ret - 1.65 * std_ret
print(f"Value at Risk (95% Confidence): {VaR_95:.4f}")

## 📉 Value at Risk (VaR) – Interpretation

We calculated the **95% Value at Risk (VaR)** for Netflix daily returns using:

$$
\text{VaR}_{95\%} = \mu - 1.65 \cdot \sigma
$$

Where:
- $\mu$ is the **mean** of daily returns
- $\sigma$ is the **standard deviation** of daily returns
- 1.65 corresponds to the Z-score for 95% confidence

---

### ✅ Result:
```python
Value at Risk (95% Confidence): -0.0555

There is a 95% probability that Netflix’s daily return will not drop below -5.55% on any given day.

In other words, we expect to lose more than 5.55% on a single day only 5% of the time (1 in 20 trading days).

# 📘 GARCH (Generalized Autoregressive Conditional Heteroskedasticity)

---

## 🔹 Why Use GARCH?

GARCH is used to **model and forecast volatility** in time series data, especially in financial markets. It captures:

- **Volatility clustering**: High-volatility periods are followed by high-volatility, and low by low.
- **Time-varying variance**: Unlike constant variance in standard models, GARCH adapts variance over time.

---

## 🔹 GARCH(1,1) Formula

$$
\sigma_t^2 = \omega + \alpha \cdot \epsilon_{t-1}^2 + \beta \cdot \sigma_{t-1}^2
$$

Where:

- $\sigma_t^2$ = forecasted variance at time $t$
- $\epsilon_{t-1}^2$ = previous day’s squared residual (shock)
- $\sigma_{t-1}^2$ = previous day’s variance
- $\omega, \alpha, \beta$ = parameters to estimate

---

## 🔹 Interpretation

- If $\alpha + \beta < 1$: volatility is **mean-reverting**
- High $\alpha$: volatility responds quickly to market shocks
- High $\beta$: volatility is **persistent** (long-lasting effects)

---

## 🔹 Use Cases

- Forecasting risk
- Calculating Value at Risk (VaR)
- Financial modeling and option pricing
- Portfolio volatility estimation

In [None]:
!pip install arch
from arch import arch_model

# Drop missing values
returns = df['Daily_Return'].dropna() * 100  # Convert to percentage for GARCH

# Fit GARCH(1,1) model
model = arch_model(returns, vol='Garch', p=1, q=1)
garch_fit = model.fit(disp='off')

# Forecast variance
forecast = garch_fit.forecast(horizon=5)
garch_vol = np.sqrt(forecast.variance.iloc[-1])

print("GARCH(1,1) Forecasted Volatility for next 5 days:")
print(garch_vol)

# Plot conditional volatility
plt.figure(figsize=(14, 6))
plt.plot(garch_fit.conditional_volatility, label='Conditional Volatility')
plt.title('GARCH(1,1) Conditional Volatility')
plt.xlabel('Date')
plt.ylabel('Volatility')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


### 📌 Interpretation:

- These values represent the **forecasted standard deviation (%) of daily returns**.
- The volatility is expected to stay between **2.42% and 2.45%** over the next 5 days.
- This implies **moderate and stable market uncertainty** around Netflix stock in the short term.

---

GARCH is particularly useful when volatility is **not constant** and tends to **cluster**—just like we observe in financial time series data.


# Conclusion and Summary

## ✅ Project Summary & Closing Notes

In this time series project on **Netflix Stock Analysis**, we:

- ✅ Performed **Exploratory Data Analysis** (EDA) and visualized trends using line plots, moving averages, and return histograms.
- ✅ Calculated **Volatility Indicators** like rolling standard deviation and **Bollinger Bands** to monitor risk levels.
- ✅ Estimated **Value at Risk (VaR)** at a 95% confidence level to quantify downside risk.
- ✅ Applied the **GARCH(1,1)** model to forecast short-term volatility and capture volatility clustering behavior in the returns.

---

### Key Takeaways:

- Netflix stock shows **volatility clustering**, making GARCH models a good fit.
- The estimated **VaR of ~-5.5%** helps define worst-case daily losses with 95% confidence.
- **Forecasted volatility (GARCH)** around ~2.4% signals moderate short-term uncertainty.


## Thanks for reviewing this project!  
