# QLDM Getting Started Tutorial

This notebook demonstrates the basic usage of QLDM for fetching, analyzing, and visualizing financial data.

## 1. Setup and Initialization

In [None]:
# Import required libraries

import plotly.graph_objects as go
import polars as pl
from plotly.subplots import make_subplots

# Import QLDM
from qldm import QLDM
from qldm.performance import profile_function
from qldm.validation import OHLCVValidator

# Initialize QLDM
qldm = QLDM()
print("QLDM initialized successfully!")

## 2. Fetching Data

In [None]:
# Fetch Apple stock data for 2024
symbol = "AAPL"
start_date = "2024-01-01"
end_date = "2024-06-30"


# Fetch data with profiling
@profile_function
def fetch_stock_data(symbol, start, end):
    return qldm.fetch(symbol=symbol, provider="yahoo", start=start, end=end)


df, profile = fetch_stock_data(symbol, start_date, end_date)
print(f"Fetched {len(df)} rows in {profile.execution_time:.2f} seconds")
print(f"Memory used: {profile.memory_used:.2f} MB")

In [None]:
# Display basic information about the data
print(f"Symbol: {symbol}")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print(f"Number of trading days: {len(df)}")
print("\nFirst 5 rows:")
df.head()

## 3. Data Validation

In [None]:
# Validate the data
validator = OHLCVValidator()
validation_result = validator.validate(df)

if validation_result.passed:
    print("✅ Data validation passed!")
else:
    print("❌ Data validation failed:")
    for issue in validation_result.issues:
        print(f"  - {issue.severity}: {issue.message}")

# Display validation summary
print("\nValidation Summary:")
print(f"- Critical issues: {validation_result.critical_count}")
print(f"- Errors: {validation_result.error_count}")
print(f"- Warnings: {validation_result.warning_count}")

## 4. Basic Analysis

In [None]:
# Calculate basic statistics
stats = {
    "Mean Close": df["close"].mean(),
    "Median Close": df["close"].median(),
    "Std Dev": df["close"].std(),
    "Min Price": df["low"].min(),
    "Max Price": df["high"].max(),
    "Total Volume": df["volume"].sum(),
    "Avg Daily Volume": df["volume"].mean(),
}

print("Statistical Summary:")
for key, value in stats.items():
    if "Volume" in key:
        print(f"{key}: {value:,.0f}")
    else:
        print(f"{key}: ${value:.2f}")

In [None]:
# Calculate returns
df = df.with_columns(
    [
        ((pl.col("close") - pl.col("close").shift(1)) / pl.col("close").shift(1) * 100).alias(
            "daily_return"
        )
    ]
)

# Calculate cumulative returns
df = df.with_columns([((pl.col("close") / df["close"][0] - 1) * 100).alias("cumulative_return")])

# Display return statistics
print("Return Analysis:")
print(f"Total Return: {df['cumulative_return'][-1]:.2f}%")
print(f"Average Daily Return: {df['daily_return'].mean():.3f}%")
print(f"Volatility (Daily Std): {df['daily_return'].std():.3f}%")
print(f"Best Day: {df['daily_return'].max():.2f}%")
print(f"Worst Day: {df['daily_return'].min():.2f}%")

## 5. Visualization

In [None]:
# Create candlestick chart with volume
fig = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    subplot_titles=(f"{symbol} Stock Price", "Volume"),
    row_heights=[0.7, 0.3],
)

# Add candlestick chart
fig.add_trace(
    go.Candlestick(
        x=df["timestamp"],
        open=df["open"],
        high=df["high"],
        low=df["low"],
        close=df["close"],
        name="OHLC",
    ),
    row=1,
    col=1,
)

# Add volume bar chart
fig.add_trace(
    go.Bar(x=df["timestamp"], y=df["volume"], name="Volume", marker_color="lightblue"), row=2, col=1
)

# Update layout
fig.update_layout(
    title=f"{symbol} Price and Volume Chart",
    yaxis_title="Price ($)",
    yaxis2_title="Volume",
    xaxis2_title="Date",
    height=600,
    showlegend=False,
)

fig.show()

In [None]:
# Plot returns distribution
import plotly.express as px

fig = px.histogram(
    df.filter(pl.col("daily_return").is_not_null()),
    x="daily_return",
    nbins=30,
    title=f"{symbol} Daily Returns Distribution",
    labels={"daily_return": "Daily Return (%)", "count": "Frequency"},
)

fig.add_vline(x=0, line_dash="dash", line_color="red")
fig.add_vline(
    x=df["daily_return"].mean(), line_dash="dash", line_color="green", annotation_text="Mean"
)

fig.show()

## 6. Multiple Symbols Comparison

In [None]:
# Fetch data for multiple symbols
symbols = ["AAPL", "MSFT", "GOOGL"]
data = {}

for symbol in symbols:
    try:
        df_temp = qldm.fetch(symbol=symbol, provider="yahoo", start=start_date, end=end_date)
        # Calculate cumulative returns
        df_temp = df_temp.with_columns(
            [((pl.col("close") / df_temp["close"][0] - 1) * 100).alias("return")]
        )
        data[symbol] = df_temp
        print(f"✅ Fetched {symbol}: {len(df_temp)} rows")
    except Exception as e:
        print(f"❌ Failed to fetch {symbol}: {e}")

In [None]:
# Plot comparative returns
fig = go.Figure()

for symbol, df in data.items():
    fig.add_trace(go.Scatter(x=df["timestamp"], y=df["return"], mode="lines", name=symbol))

fig.update_layout(
    title="Cumulative Returns Comparison",
    xaxis_title="Date",
    yaxis_title="Cumulative Return (%)",
    hovermode="x unified",
    height=500,
)

fig.show()

# Print final returns
print("\nFinal Returns:")
for symbol, df in data.items():
    final_return = df["return"][-1]
    print(f"{symbol}: {final_return:.2f}%")

## 7. Technical Indicators

In [None]:
# Calculate moving averages
df = data["AAPL"].with_columns(
    [
        pl.col("close").rolling_mean(window_size=20).alias("ma20"),
        pl.col("close").rolling_mean(window_size=50).alias("ma50"),
    ]
)


# Calculate RSI
def calculate_rsi(df, period=14):
    delta = df["close"] - df["close"].shift(1)
    gain = pl.when(delta > 0).then(delta).otherwise(0)
    loss = pl.when(delta < 0).then(-delta).otherwise(0)

    avg_gain = gain.rolling_mean(window_size=period)
    avg_loss = loss.rolling_mean(window_size=period)

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))

    return rsi


df = df.with_columns([calculate_rsi(df).alias("rsi")])

# Plot price with indicators
fig = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    subplot_titles=("Price with Moving Averages", "RSI"),
    row_heights=[0.7, 0.3],
)

# Price and MAs
fig.add_trace(
    go.Scatter(x=df["timestamp"], y=df["close"], name="Close", line={"color": "blue"}),
    row=1,
    col=1,
)
fig.add_trace(
    go.Scatter(x=df["timestamp"], y=df["ma20"], name="MA20", line={"color": "orange"}),
    row=1,
    col=1,
)
fig.add_trace(
    go.Scatter(x=df["timestamp"], y=df["ma50"], name="MA50", line={"color": "red"}), row=1, col=1
)

# RSI
fig.add_trace(
    go.Scatter(x=df["timestamp"], y=df["rsi"], name="RSI", line={"color": "purple"}), row=2, col=1
)
fig.add_hline(y=70, line_dash="dash", line_color="red", row=2, col=1)
fig.add_hline(y=30, line_dash="dash", line_color="green", row=2, col=1)

fig.update_layout(height=600, title="Technical Analysis")
fig.show()

## 8. Export Results

In [None]:
# Export to different formats
output_dir = "./output"
import os

os.makedirs(output_dir, exist_ok=True)

# Export to CSV
csv_path = f"{output_dir}/aapl_analysis.csv"
df.write_csv(csv_path)
print(f"✅ Exported to CSV: {csv_path}")

# Export to Parquet (efficient format)
parquet_path = f"{output_dir}/aapl_analysis.parquet"
df.write_parquet(parquet_path)
print(f"✅ Exported to Parquet: {parquet_path}")

# Export summary statistics
summary = pl.DataFrame({"Metric": list(stats.keys()), "Value": list(stats.values())})
summary_path = f"{output_dir}/summary_stats.csv"
summary.write_csv(summary_path)
print(f"✅ Exported summary: {summary_path}")

## 9. Performance Optimization

In [None]:
# Demonstrate caching
from qldm.performance import cache_result, get_cache_stats


@cache_result(ttl_seconds=300)  # Cache for 5 minutes
def get_expensive_data(symbol):
    print(f"Fetching data for {symbol}...")
    return qldm.fetch(symbol, start="2024-01-01", end="2024-06-30")


# First call - will fetch from provider
import time

start = time.time()
df1 = get_expensive_data("MSFT")
print(f"First call took: {time.time() - start:.2f} seconds")

# Second call - will use cache
start = time.time()
df2 = get_expensive_data("MSFT")
print(f"Second call took: {time.time() - start:.4f} seconds (cached)")

# Display cache statistics
stats = get_cache_stats()
print("\nCache Statistics:")
print(f"Hit rate: {stats['memory_cache']['hit_rate']:.1%}")
print(f"Total entries: {stats['memory_cache']['total_entries']}")
print(f"Cache size: {stats['memory_cache']['total_size_mb']:.2f} MB")

## 10. Conclusion

This notebook demonstrated:
- Fetching financial data using QLDM
- Data validation and quality checks
- Basic statistical analysis
- Visualization with interactive charts
- Multi-symbol comparison
- Technical indicators calculation
- Data export to various formats
- Performance optimization with caching

For more advanced usage, check out the other example notebooks and the full documentation.