In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.stattools import adfuller

plt.style.use("seaborn-v0_8")


# Parameters

In [2]:
TICKERS = ["TSLA", "BND", "SPY"]
START_DATE = "2015-01-01"
END_DATE = "2026-01-15"


# Data Extraction

In [3]:
data = {}

for ticker in TICKERS:
    df = yf.download(ticker, start=START_DATE, end=END_DATE)
    df["Ticker"] = ticker
    data[ticker] = df

# Preview Tesla data
data["TSLA"].head()


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume,Ticker
Ticker,TSLA,TSLA,TSLA,TSLA,TSLA,Unnamed: 6_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2015-01-02,14.620667,14.883333,14.217333,14.858,71466000,TSLA
2015-01-05,14.006,14.433333,13.810667,14.303333,80527500,TSLA
2015-01-06,14.085333,14.28,13.614,14.004,93928500,TSLA
2015-01-07,14.063333,14.318667,13.985333,14.223333,44526000,TSLA
2015-01-08,14.041333,14.253333,14.000667,14.187333,51637500,TSLA


# Basic Data Quality Checks

In [4]:
for ticker, df in data.items():
    print(f"\n{ticker} INFO")
    print(df.info())
    print("\nMissing values:")
    print(df.isnull().sum())



TSLA INFO
<class 'pandas.DataFrame'>
DatetimeIndex: 2775 entries, 2015-01-02 to 2026-01-14
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   (Close, TSLA)   2775 non-null   float64
 1   (High, TSLA)    2775 non-null   float64
 2   (Low, TSLA)     2775 non-null   float64
 3   (Open, TSLA)    2775 non-null   float64
 4   (Volume, TSLA)  2775 non-null   int64  
 5   (Ticker, )      2775 non-null   str    
dtypes: float64(4), int64(1), str(1)
memory usage: 151.8 KB
None

Missing values:
Price   Ticker
Close   TSLA      0
High    TSLA      0
Low     TSLA      0
Open    TSLA      0
Volume  TSLA      0
Ticker            0
dtype: int64

BND INFO
<class 'pandas.DataFrame'>
DatetimeIndex: 2775 entries, 2015-01-02 to 2026-01-14
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   (Close, BND)   2775 non-null   float64
 1   (High, BND)    2775 non-nu

# Data Cleaning & Feature Engineering

In [None]:
def clean_data(df):
    df = df.copy()
    df = df.dropna()
    df["Return"] = df["Adj Close"].pct_change()
    df["Volatility"] = df["Return"].rolling(window=20).std()
    return df.dropna()


for ticker in data:
    data[ticker] = clean_data(data[ticker])

data["TSLA"].head()


# Summary Statistics

In [None]:
data["TSLA"][["Adj Close", "Return", "Volatility"]].describe()


# Price Trends (EDA)

In [None]:
plt.figure(figsize=(12, 6))

for ticker in TICKERS:
    plt.plot(data[ticker].index, data[ticker]["Adj Close"], label=ticker)

plt.title("Adjusted Close Price Over Time")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()


# Daily Returns Visualization

In [None]:
plt.figure(figsize=(12, 6))

for ticker in TICKERS:
    plt.plot(data[ticker].index, data[ticker]["Return"], label=ticker)

plt.title("Daily Returns")
plt.xlabel("Date")
plt.ylabel("Return")
plt.legend()
plt.show()


# Rolling Volatility

In [None]:
plt.figure(figsize=(12, 6))

for ticker in TICKERS:
    plt.plot(
        data[ticker].index,
        data[ticker]["Volatility"],
        label=f"{ticker} Volatility"
    )

plt.title("20-Day Rolling Volatility")
plt.xlabel("Date")
plt.ylabel("Volatility")
plt.legend()
plt.show()


# Outlier Detection (Tesla Returns)

In [None]:
tsla_returns = data["TSLA"]["Return"]

plt.figure(figsize=(10, 5))
sns.boxplot(x=tsla_returns)
plt.title("Tesla Daily Return Outliers")
plt.show()


# Stationarity Test (ADF)

In [None]:
def adf_test(series, name=""):
    result = adfuller(series)
    print(f"ADF Test for {name}")
    print(f"ADF Statistic: {result[0]:.4f}")
    print(f"p-value: {result[1]:.4f}")
    print("-" * 40)


adf_test(data["TSLA"]["Adj Close"], "TSLA Adj Close")
adf_test(data["TSLA"]["Return"], "TSLA Returns")


# Value at Risk (VaR)

In [None]:
def value_at_risk(returns, confidence=0.95):
    return np.percentile(returns, (1 - confidence) * 100)


tsla_var = value_at_risk(tsla_returns)
print(f"TSLA 95% VaR: {tsla_var:.4f}")


# Sharpe Ratio

In [None]:
def sharpe_ratio(returns, risk_free_rate=0):
    return np.mean(returns - risk_free_rate) / np.std(returns)


for ticker in TICKERS:
    sr = sharpe_ratio(data[ticker]["Return"])
    print(f"{ticker} Sharpe Ratio: {sr:.4f}")


# Save Processed Data

In [None]:
for ticker, df in data.items():
    df.to_csv(f"data/processed/{ticker}_processed.csv")
