# setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import sys
sys.path.append('..')

from src.data_fetcher import StockDataFetcher
from src.features import FeatureEngineer

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("Set2")

print("Ready to analyze NVIDIA stock")

# data

In [None]:
fetcher = StockDataFetcher('NVDA')
df = fetcher.get_historical_data(period='3y')

print(f"Analyzing {len(df)} trading days")
print(f"Period: {df['Date'].min().date()} to {df['Date'].max().date()}")
print(f"Price range: ${df['Close'].min():.2f} - ${df['Close'].max():.2f}")

df.head()

# price evolution

In [None]:
fig, ax = plt.subplots(figsize=(16, 7))

ax.plot(df['Date'], df['Close'], linewidth=2.5, color='#2ecc71', alpha=0.9)
ax.fill_between(df['Date'], df['Close'], alpha=0.2, color='#2ecc71')

start_price = df['Close'].iloc[0]
end_price = df['Close'].iloc[-1]
total_return = ((end_price - start_price) / start_price) * 100

ax.text(0.02, 0.95, f'Total Return: {total_return:+.1f}%', 
        transform=ax.transAxes, fontsize=14, fontweight='bold',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

ax.set_title('NVIDIA Stock Price Journey', fontsize=19, fontweight='bold', pad=20)
ax.set_xlabel('Date', fontsize=13)
ax.set_ylabel('Closing Price (USD)', fontsize=13)
ax.grid(True, alpha=0.3, linestyle='--')

plt.tight_layout()
plt.show()

print(f"Starting price: ${start_price:.2f}")
print(f"Ending price: ${end_price:.2f}")
print(f"Absolute gain: ${end_price - start_price:.2f}")
print(f"Percentage gain: {total_return:.2f}%")

# daily return analysis

In [None]:
df['Daily_Return'] = df['Close'].pct_change() * 100
df['Cumulative_Return'] = (1 + df['Close'].pct_change()).cumprod() - 1

fig, axes = plt.subplots(2, 2, figsize=(16, 10))

axes[0, 0].plot(df['Date'], df['Daily_Return'], linewidth=1, color='#3498db', alpha=0.7)
axes[0, 0].axhline(y=0, color='red', linestyle='--', alpha=0.5)
axes[0, 0].set_title('Daily Returns Over Time', fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('Return (%)', fontsize=11)
axes[0, 0].grid(True, alpha=0.3)

axes[0, 1].hist(df['Daily_Return'].dropna(), bins=60, color='#9b59b6', 
                alpha=0.7, edgecolor='black')
axes[0, 1].axvline(df['Daily_Return'].mean(), color='red', linestyle='--', 
                   linewidth=2, label=f'Mean: {df["Daily_Return"].mean():.2f}%')
axes[0, 1].set_title('Return Distribution', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Daily Return (%)', fontsize=11)
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

axes[1, 0].plot(df['Date'], df['Cumulative_Return'] * 100, 
                linewidth=2.5, color='#e74c3c')
axes[1, 0].fill_between(df['Date'], df['Cumulative_Return'] * 100, 
                        alpha=0.3, color='#e74c3c')
axes[1, 0].set_title('Cumulative Returns', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Date', fontsize=11)
axes[1, 0].set_ylabel('Cumulative Return (%)', fontsize=11)
axes[1, 0].grid(True, alpha=0.3)

stats.probplot(df['Daily_Return'].dropna(), dist="norm", plot=axes[1, 1])
axes[1, 1].set_title('Q-Q Plot (Normality Check)', fontsize=14, fontweight='bold')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"Average daily return: {df['Daily_Return'].mean():.3f}%")
print(f"Daily volatility (std): {df['Daily_Return'].std():.3f}%")
print(f"Best single day: {df['Daily_Return'].max():.2f}%")
print(f"Worst single day: {df['Daily_Return'].min():.2f}%")
print(f"Sharpe ratio (approx): {(df['Daily_Return'].mean() / df['Daily_Return'].std()):.3f}")

# volume pattern

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(16, 10), sharex=True)

ax1.plot(df['Date'], df['Close'], linewidth=2, color='#16a085', label='Price')
ax1.set_ylabel('Price (USD)', fontsize=12)
ax1.set_title('Price vs Volume Relationship', fontsize=16, fontweight='bold', pad=15)
ax1.legend(loc='upper left')
ax1.grid(True, alpha=0.3)

color_volume = ['#e74c3c' if ret < 0 else '#2ecc71' 
                for ret in df['Daily_Return'].fillna(0)]
ax2.bar(df['Date'], df['Volume'], width=1, color=color_volume, alpha=0.6)
ax2.set_ylabel('Volume', fontsize=12)
ax2.set_xlabel('Date', fontsize=12)
ax2.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

avg_volume = df['Volume'].mean()
print(f"Average daily volume: {avg_volume:,.0f} shares")
print(f"Highest volume day: {df['Volume'].max():,.0f} shares")
print(f"Date of highest volume: {df.loc[df['Volume'].idxmax(), 'Date'].date()}")