# Task 1: Preprocess and Explore the Data

## Objective
Load, clean, and understand the data to prepare it for modeling.
Focus on extracting financial data, cleaning it, and performing exploratory analysis.

In [1]:
import sys
import os

# Add src to path
sys.path.append(os.path.abspath(os.path.join('../src')))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from data_loader import fetch_data, clean_data
from utils import check_stationarity, calculate_daily_returns, plot_price_series, calculate_risk_metrics

%matplotlib inline

ModuleNotFoundError: No module named 'yfinance'

## 1. Data Extraction

In [None]:
TICKERS = ['TSLA', 'BND', 'SPY']
START_DATE = '2015-01-01'
END_DATE = '2026-01-15'

raw_data = fetch_data(TICKERS, START_DATE, END_DATE)
print("Raw Data Shape:", raw_data.shape)
raw_data.head()

## 2. Data Cleaning
Checking for missing values and handling them.

In [None]:
data = clean_data(raw_data)

# We typically focus on 'Close' or 'Adj Close' prices for analysis
if isinstance(data.columns, pd.MultiIndex):
    # Select Close price
    # Depending on yfinance version, structure might be (Ticker, Price) or (Price, Ticker)
    # Let's inspect columns first to safely extract
    print(data.columns)
    try:
        close_prices = data['Close']
    except KeyError:
        # Try swapping levels if needed or check structure
        close_prices = data.xs('Close', level=1, axis=1) if 'Close' in data.columns.get_level_values(1) else data

else:
    close_prices = data['Close']

close_prices.head()

## 3. Exploratory Data Analysis (EDA)
### Visualizing Closing Prices

In [None]:
plot_price_series(close_prices, title="Closing Prices of TSLA, BND, SPY")

### Daily Returns and Volatility

In [None]:
daily_returns = calculate_daily_returns(close_prices)

plt.figure(figsize=(14, 6))
for col in daily_returns.columns:
    plt.plot(daily_returns.index, daily_returns[col], label=col, alpha=0.7)
plt.title("Daily Returns")
plt.legend()
plt.show()

In [None]:
# Rolling Volatility (30-day window)
rolling_volatility = daily_returns.rolling(window=30).std()

plt.figure(figsize=(14, 6))
for col in rolling_volatility.columns:
    plt.plot(rolling_volatility.index, rolling_volatility[col], label=col)
plt.title("30-Day Rolling Volatility")
plt.legend()
plt.show()

## 4. Stationarity Analysis
Testing for stationarity using ADF test.

In [None]:
print("Stationarity Test on Returns:")
for col in daily_returns.columns:
    check_stationarity(daily_returns[col], name=f"{col} Returns")

print("\nStationarity Test on Prices (Log Prices often used, or raw):")
for col in close_prices.columns:
    check_stationarity(close_prices[col], name=f"{col} Prices")

## 5. Risk Metrics

In [None]:
risk_metrics = calculate_risk_metrics(daily_returns)
print("Risk Metrics:")
risk_metrics

### Conclusion
- **TSLA**: High volatility, high risk, potential for high returns.
- **BND**: Low volatility, stable, lower returns.
- **SPY**: Moderate volatility, diversified market exposure.

Returns are stationary, while prices are non-stationary.