# Databento Oil Futures Data Analysis

This notebook demonstrates loading and analyzing oil futures data from Databento.

## Setup and Data Loading

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import zstandard as zstd
import json
from pathlib import Path
import glob
from datetime import datetime

%matplotlib inline
plt.style.use('seaborn')

In [None]:
class DabentoLoader:
    """Class to load and process Databento oil futures data"""
    def __init__(self, base_path):
        self.base_path = Path(base_path)
        self.symbol_map = None
        self.loaded_data = {}
        
    def load_symbol_mappings(self):
        """Load and process symbol mapping information"""
        symbol_df = pd.read_csv(self.base_path / 'mini_symbology.csv')
        self.symbol_map = symbol_df.groupby('raw_symbol')['instrument_id'].first().to_dict()
        return self.symbol_map
    
    def decompress_zst(self, file_path):
        """Decompress a .zst file and return its contents"""
        with open(file_path, 'rb') as fh:
            dctx = zstd.ZstdDecompressor()
            decompressed = dctx.stream_reader(fh)
            return pd.read_csv(decompressed)
    
    def load_ohlcv_files(self, pattern="glbx-mdp3-*.ohlcv-1m.*.csv.zst"):
        """Load all OHLCV files matching the pattern"""
        files = glob.glob(str(self.base_path / pattern))
        
        for file in files:
            try:
                symbol = file.split('ohlcv-1m.')[-1].split('.csv.zst')[0]
                df = self.decompress_zst(file)
                df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ns')
                df.set_index('timestamp', inplace=True)
                self.loaded_data[symbol] = df
                
            except Exception as e:
                print(f"Error loading {file}: {str(e)}")
    
    def create_consolidated_df(self):
        """Create a consolidated multi-level DataFrame"""
        consolidated = {}
        
        for symbol, df in self.loaded_data.items():
            for col in ['open', 'high', 'low', 'close', 'volume']:
                if col not in consolidated:
                    consolidated[col] = pd.DataFrame()
                consolidated[col][symbol] = df[col]
        
        final_df = pd.concat(
            [consolidated[col] for col in ['open', 'high', 'low', 'close', 'volume']],
            keys=['open', 'high', 'low', 'close', 'volume'],
            axis=1
        )
        
        return final_df

## Load and Prepare Data

Set the path to your data directory and load the data:

In [None]:
# Set the path to your data directory
BASE_PATH = 'path/to/your/data/directory'

# Initialize loader
loader = DabentoLoader(BASE_PATH)

# Load symbol mappings
symbol_map = loader.load_symbol_mappings()
print(f"Loaded {len(symbol_map)} symbol mappings")

# Load OHLCV data
loader.load_ohlcv_files()
print(f"Loaded data for {len(loader.loaded_data)} symbols")

# Create consolidated DataFrame
df = loader.create_consolidated_df()
print(f"Created consolidated DataFrame with shape {df.shape}")

## Data Exploration

Let's examine the loaded data:

In [None]:
# Display basic information about the dataset
print("Dataset Info:")
print(f"Date Range: {df.index.min()} to {df.index.max()}")
print(f"Number of symbols: {len(df.columns.levels[1])}")

# Display available symbols
print("\nAvailable symbols:")
print(df.columns.levels[1].tolist())

## Basic Data Visualization

Let's create some basic visualizations of the futures data:

In [None]:
def plot_futures_prices(df, symbols=None, n_symbols=5):
    """Plot closing prices for selected futures contracts"""
    if symbols is None:
        # Take first n_symbols if none specified
        symbols = df['close'].columns[:n_symbols]
    
    plt.figure(figsize=(15, 8))
    for symbol in symbols:
        plt.plot(df['close'][symbol], label=symbol)
    
    plt.title('Futures Closing Prices')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

# Plot the first 5 symbols
plot_futures_prices(df)

## Calculate Basic Trading Metrics

Let's calculate some basic trading metrics:

In [None]:
def calculate_metrics(df, symbol):
    """Calculate basic trading metrics for a symbol"""
    # Daily returns
    returns = df['close'][symbol].pct_change()
    
    # Volatility (20-day rolling)
    volatility = returns.rolling(20).std() * np.sqrt(252)
    
    # 20-day moving average
    ma20 = df['close'][symbol].rolling(20).mean()
    
    metrics = pd.DataFrame({
        'returns': returns,
        'volatility': volatility,
        'ma20': ma20
    })
    
    return metrics

# Calculate metrics for the first symbol
first_symbol = df['close'].columns[0]
metrics = calculate_metrics(df, first_symbol)
print(f"Metrics for {first_symbol}:")
print(metrics.describe())

## Next Steps

Potential next steps for analysis:
1. Implement contract roll adjustment
2. Calculate term structure and spreads
3. Implement technical indicators
4. Develop trading strategies
5. Perform backtesting