# WTI COT MM Nowcasting EDA 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings("ignore")


In [3]:
import numpy as np
import pandas as pd 

In [4]:
import scipy.stats as stats

In [5]:
import sys
sys.path.append('../../../')

In [6]:
import matplotlib.pyplot as plt 

In [7]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


In [8]:
from src.utils.dates import get_nyse_business_dates
from src.utils.io.read import PreprocessedDataReader

In [9]:
from src.preprocessing.base import FutureTicker

In [10]:
from src.settings import Settings

In [11]:
from research.metrics import get_features_responses_correlation
from research.plots import plot_bin_summary_of_xy

In [12]:
pdr = PreprocessedDataReader(Settings.historical.paths.PREPROCESSED_DATA_PATH)                    

In [13]:
dataset = pdr.read_dataset(ticker=FutureTicker.WTI)

In [14]:
dataset.shape

(813, 81)

In [15]:
dataset.tail(3)

Unnamed: 0,tradeDate,Name,Commercial_NetPosition,CommercialLongPosition,CommercialShortPosition,ManagedMoney_NetPosition,ManagedMoney_LongPosition,ManagedMoney_ShortPosition,Commercial_NetPosition_change,prior_report_Commercial_NetPosition_change,...,prior_cumulative_5D_F2_Volume_change,next_prior_cumulative_5D_F2_Volume_change,prior_cumulative_5D_F1MinusF2_Volume_change,next_prior_cumulative_5D_F1MinusF2_Volume_change,F1_RolledPrice_change,next_F1_RolledPrice_change,F2_RolledPrice_change,next_F2_RolledPrice_change,F3_RolledPrice_change,next_F3_RolledPrice_change
810,2025-07-15,CL,368688.0,1226014.0,857326.0,217954.0,404769.0,186815.0,31871.0,25279.0,...,225987.0,335852.0,-232123.0,-916765.0,-1.81,0.08,-1.61,0.11,-1.33,0.18
811,2025-07-22,CL,355309.0,1179861.0,824552.0,208836.0,406472.0,197636.0,-13379.0,31871.0,...,335852.0,-652406.0,-916765.0,1366289.0,0.08,3.9,0.11,3.82,0.18,3.64
812,2025-07-29,CL,362436.0,1172427.0,809991.0,207342.0,411275.0,203933.0,7127.0,-13379.0,...,-652406.0,,1366289.0,,3.9,,3.82,,3.64,


In [16]:
dataset['tradeDate'].min(),dataset['tradeDate'].max()

('2010-01-05', '2025-07-29')

In [17]:
dataset['tradeDate'] = pd.to_datetime(dataset['tradeDate']).dt.date 

In [18]:
forward_response_names = ['forward_ManagedMoney_NetPosition_to_openinterest_change',
                            'forward_ManagedMoney_LongPosition_to_openinterest_change',
                            'forward_ManagedMoney_ShortPosition_to_openinterest_change']

prior_response_names = ['prior_report_ManagedMoney_NetPosition_to_openinterest_change',
                            'prior_report_ManagedMoney_LongPosition_to_openinterest_change',
                            'prior_report_ManagedMoney_ShortPosition_to_openinterest_change']


In [19]:
get_features_responses_correlation(dataset = dataset,
                                   features = prior_response_names,
                                   responses=forward_response_names).sort_values(by = forward_response_names[0])

Unnamed: 0,Feature,forward_ManagedMoney_NetPosition_to_openinterest_change,forward_ManagedMoney_LongPosition_to_openinterest_change,forward_ManagedMoney_ShortPosition_to_openinterest_change
0,prior_report_ManagedMoney_NetPosition_to_openi...,-0.078842,-0.13222,-0.035035
1,prior_report_ManagedMoney_LongPosition_to_open...,-0.074889,-0.135681,-0.048732
2,prior_report_ManagedMoney_ShortPosition_to_ope...,0.025823,0.036602,0.000871


In [20]:
## Cache: Feature-Response Correlations (forecast)
import json
from src.settings import OUTPUT_DIR

forecast_output_dir = OUTPUT_DIR / "wti" / "mm" / "forecast"
forecast_output_dir.mkdir(parents=True, exist_ok=True)

feature_names = [
    'prior_report_ManagedMoney_NetPosition_change',
    'prior_report_ManagedMoney_LongPosition_change',
    'prior_report_ManagedMoney_ShortPosition_change',
    'prior_report_ManagedMoney_NetPosition_to_openinterest_change',
    'prior_report_ManagedMoney_LongPosition_to_openinterest_change',
    'prior_report_ManagedMoney_ShortPosition_to_openinterest_change',
    'prior_report_SyntheticF1MinusF2_RolledPrice_change',
    'prior_cumulative_5D_F1_Volume_change',
    'prior_cumulative_5D_F2_Volume_change',
    'prior_cumulative_5D_F1MinusF2_Volume_change',
    'prior_5D_F1_OI_change',
    'prior_5D_F2_OI_change',
    'prior_5D_AGG_OI_change',
    'prior_5D_F1MinusF2_openinterest_change',
    'F1_RolledPrice_rolling_20D_volatility',
    'F2_RolledPrice_rolling_20D_volatility',
    'F3_RolledPrice_rolling_20D_volatility',
    'F1_RolledPrice_change',
    'F2_RolledPrice_change',
    'F3_RolledPrice_change'
]

unscaled_response_names = [
    'forward_report_ManagedMoney_NetPosition_change',
    'forward_report_ManagedMoney_LongPosition_change',
    'forward_report_ManagedMoney_ShortPosition_change'
]
scaled_response_names = [
    'forward_ManagedMoney_NetPosition_to_openinterest_change',
    'forward_ManagedMoney_LongPosition_to_openinterest_change',
    'forward_ManagedMoney_ShortPosition_to_openinterest_change'
]

all_response_names = unscaled_response_names + scaled_response_names

corr_df = get_features_responses_correlation(
    dataset=dataset,
    features=feature_names,
    responses=all_response_names
)

correlations_cache = {
    "ticker": "wti",
    "investor_type": "mm",
    "horizon": "forecast",
    "method": "spearman",
    "correlations": corr_df.to_dict(orient='list')
}

with open(forecast_output_dir / "01_feature_response_correlations.json", "w") as f:
    json.dump(correlations_cache, f, indent=2)
print(f"Saved 01_feature_response_correlations.json to {forecast_output_dir}")

Saved 01_feature_response_correlations.json to /Users/oualid/Documents/Projects/omroot_repos/COTAME/cache/output/wti/mm/forecast
