In [14]:
import yfinance as yf
import polars as pl
import pandas as pd
from datetime import datetime, timedelta
import pytz

In [2]:
# Step 1: Define the tickers for crude oil, brent, and natural gas
tickers = {
    'crude': 'CL=F',    # Crude Oil Futures
    'brent': 'BZ=F',    # Brent Crude Oil Futures
    'gas': 'NG=F'       # Natural Gas Futures
}

In [3]:
# Step 2: Calculate the time range
end_time = datetime.now(pytz.UTC)
start_time = end_time - timedelta(days=1)

In [19]:
# Step 3: Create a function to download and process data for each commodity
def get_commodity_data(ticker):
    # Download the data using yfinance
    data = yf.download(
        ticker,
        start=start_time,
        end=end_time,
        interval='1m'  # 1-minute intervals
    )
    
    # Reset index and handle multi-index columns
    df = data.reset_index()
    
    # Standardize column names to remove ticker information
    df.columns = [col[0] if isinstance(col, tuple) else col for col in df.columns]
    
    # Convert to Polars DataFrame and add ticker column
    df = pl.from_pandas(df)
    df = df.with_columns(pl.lit(ticker).alias('Symbol'))
    
    return df

In [20]:
# Step 4: Download and combine all data
dfs = []
for name, ticker in tickers.items():
    print(f"Downloading {name} data...")
    df = get_commodity_data(ticker)
    dfs.append(df)

[*********************100%***********************]  1 of 1 completed

Downloading crude data...
Downloading brent data...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Downloading gas data...





In [21]:
print(dfs)

[shape: (213, 7)
┌─────────────────────────┬───────────┬───────────┬───────────┬───────────┬────────┬────────┐
│ Datetime                ┆ Close     ┆ High      ┆ Low       ┆ Open      ┆ Volume ┆ Symbol │
│ ---                     ┆ ---       ┆ ---       ┆ ---       ┆ ---       ┆ ---    ┆ ---    │
│ datetime[ns, UTC]       ┆ f64       ┆ f64       ┆ f64       ┆ f64       ┆ i64    ┆ str    │
╞═════════════════════════╪═══════════╪═══════════╪═══════════╪═══════════╪════════╪════════╡
│ 2025-01-19 23:30:00 UTC ┆ 78.190002 ┆ 78.190002 ┆ 78.190002 ┆ 78.190002 ┆ 0      ┆ CL=F   │
│ 2025-01-19 23:46:00 UTC ┆ 78.300003 ┆ 78.300003 ┆ 78.290001 ┆ 78.290001 ┆ 9      ┆ CL=F   │
│ 2025-01-19 23:47:00 UTC ┆ 78.290001 ┆ 78.290001 ┆ 78.290001 ┆ 78.290001 ┆ 4      ┆ CL=F   │
│ 2025-01-19 23:49:00 UTC ┆ 78.290001 ┆ 78.290001 ┆ 78.290001 ┆ 78.290001 ┆ 10     ┆ CL=F   │
│ 2025-01-20 00:13:00 UTC ┆ 78.330002 ┆ 78.330002 ┆ 78.330002 ┆ 78.330002 ┆ 85     ┆ CL=F   │
│ …                       ┆ …         ┆ …  

In [22]:
# Step 5: Combine all dataframes
combined_data = pl.concat(dfs)


In [23]:
print(combined_data)

shape: (1_853, 7)
┌─────────────────────────┬───────────┬───────────┬───────────┬───────────┬────────┬────────┐
│ Datetime                ┆ Close     ┆ High      ┆ Low       ┆ Open      ┆ Volume ┆ Symbol │
│ ---                     ┆ ---       ┆ ---       ┆ ---       ┆ ---       ┆ ---    ┆ ---    │
│ datetime[ns, UTC]       ┆ f64       ┆ f64       ┆ f64       ┆ f64       ┆ i64    ┆ str    │
╞═════════════════════════╪═══════════╪═══════════╪═══════════╪═══════════╪════════╪════════╡
│ 2025-01-19 23:30:00 UTC ┆ 78.190002 ┆ 78.190002 ┆ 78.190002 ┆ 78.190002 ┆ 0      ┆ CL=F   │
│ 2025-01-19 23:46:00 UTC ┆ 78.300003 ┆ 78.300003 ┆ 78.290001 ┆ 78.290001 ┆ 9      ┆ CL=F   │
│ 2025-01-19 23:47:00 UTC ┆ 78.290001 ┆ 78.290001 ┆ 78.290001 ┆ 78.290001 ┆ 4      ┆ CL=F   │
│ 2025-01-19 23:49:00 UTC ┆ 78.290001 ┆ 78.290001 ┆ 78.290001 ┆ 78.290001 ┆ 10     ┆ CL=F   │
│ 2025-01-20 00:13:00 UTC ┆ 78.330002 ┆ 78.330002 ┆ 78.330002 ┆ 78.330002 ┆ 85     ┆ CL=F   │
│ …                       ┆ …         ┆ … 

In [24]:
# Step 6: Clean and organize the final dataset
final_data = (
    combined_data
    .sort(['Symbol', 'Datetime'])
    .select([
        'Datetime',
        'Symbol',
        'Open',
        'High',
        'Low',
        'Close',
        'Volume'
    ])
)

In [25]:
# Display the first few rows
print("\nFirst few rows of the data:")
print(final_data.head())

# Display basic statistics
print("\nBasic statistics:")
print(final_data.group_by('Symbol').agg([
    pl.col('Close').mean().alias('Avg_Price'),
    pl.col('Volume').sum().alias('Total_Volume')
]))


First few rows of the data:
shape: (5, 7)
┌─────────────────────────┬────────┬───────────┬───────────┬───────────┬───────────┬────────┐
│ Datetime                ┆ Symbol ┆ Open      ┆ High      ┆ Low       ┆ Close     ┆ Volume │
│ ---                     ┆ ---    ┆ ---       ┆ ---       ┆ ---       ┆ ---       ┆ ---    │
│ datetime[ns, UTC]       ┆ str    ┆ f64       ┆ f64       ┆ f64       ┆ f64       ┆ i64    │
╞═════════════════════════╪════════╪═══════════╪═══════════╪═══════════╪═══════════╪════════╡
│ 2025-01-19 23:00:00 UTC ┆ BZ=F   ┆ 80.940002 ┆ 80.949997 ┆ 80.68     ┆ 80.68     ┆ 0      │
│ 2025-01-19 23:01:00 UTC ┆ BZ=F   ┆ 80.690002 ┆ 80.690002 ┆ 80.690002 ┆ 80.690002 ┆ 2      │
│ 2025-01-19 23:03:00 UTC ┆ BZ=F   ┆ 80.760002 ┆ 80.760002 ┆ 80.760002 ┆ 80.760002 ┆ 27     │
│ 2025-01-19 23:04:00 UTC ┆ BZ=F   ┆ 80.82     ┆ 80.82     ┆ 80.82     ┆ 80.82     ┆ 4      │
│ 2025-01-19 23:05:00 UTC ┆ BZ=F   ┆ 80.82     ┆ 80.82     ┆ 80.82     ┆ 80.82     ┆ 14     │
└────────────────