In [28]:
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import yfinance as yf
import os

def fetch_eia_data(filename, start_date, end_date):
    df = pd.read_excel(filename, sheet_name=1, skiprows=2)
    
    if not pd.api.types.is_datetime64_any_dtype(df['Date']):
        df['Date'] = pd.to_datetime(df['Date'], format='%b %d, %Y')
    
    df = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
    return df

In [29]:
#from google.colab import drive

#drive.mount("/content/drive")
#drive.mount("/content/drive", force_remount=True)

In [45]:
start_date = '2024-01-01'
end_date = '2025-05-13'

DATA_DIR = "data"  # Local folder
os.makedirs(DATA_DIR, exist_ok=True)  # Create if missing

# Path to your local file
DATASET_EIA = os.path.join(DATA_DIR, "RNGWHHDd.xls")

# Fetch Natural Gas Spot Prices
natural_gas_prices = fetch_eia_data(DATASET_EIA, start_date, end_date)

# Using Yahoo Finance as a fallback for natural gas prices
natural_gas_yf = yf.download('NG=F', start=start_date, end=end_date)['Close']
df = pd.DataFrame(natural_gas_yf).rename(columns={'NG=F': 'Natural_Gas_Spot_Price'}) 

# Fetch additional economic variables using Yahoo Finance and FRED
indices = {
    '^GSPC': 'S&P 500',
    '^DJI': 'Dow Jones Industrial Average',
    'EURUSD=X': 'EUR/USD Exchange Rate',
    'NASDAQCOM': 'NASDAQ Composite',
    'DEXJPUS': 'JPY/USD',
    'DEXUSUK': 'USD/GBP',
    'DCOILWTICO': 'WTI Spot Price',
    'FEDFUNDS': 'Federal Funds Rate',
    'T5YIE': '5-Year Breakeven Inflation Rate',
    'T10YIE': '10-Year Breakeven Inflation Rate',
    'GS1': '1-Year Treasury Rate',
    'GS10': '10-Year Treasury Rate',
    'MPRIME': 'Prime Rate'
} 

# Fetch from Yahoo Finance
for index, name in indices.items():
    if index.startswith('^') or index.endswith('=X'):
        df[name] = yf.download(index, start=start_date, end=end_date)['Close']

# Fetch from FRED
for index, name in indices.items():
    if not (index.startswith('^') or index.endswith('=X')):
        df[name] = web.DataReader(index, 'fred', start_date, end_date)

# Calculating Momentum and Moving Averages
df['Momentum_5'] = df['Natural_Gas_Spot_Price'].rolling(window=5).apply(lambda x: (np.diff(x) > 0).sum(), raw=True)
df['Momentum_10'] = df['Natural_Gas_Spot_Price'].rolling(window=10).apply(lambda x: (np.diff(x) > 0).sum(), raw=True)
df['MA_5'] = df['Natural_Gas_Spot_Price'].rolling(window=5).mean()
df['MA_10'] = df['Natural_Gas_Spot_Price'].rolling(window=10).mean()

# Applying natural logarithm to all columns except interest rates
for column in df.columns:
    if 'Rate' not in column:
        df[column] = np.log(df[column])

# Handling any infinities or NaNs that arise from logarithmic transformation or empty data points
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.ffill(inplace=True) # Forward fill

df = df.iloc[10:]
# Export to CSV
df.to_csv(os.path.join(DATA_DIR, 'compiled_dataset.csv'), index=True)

print("Dataset compiled and saved to 'compiled_dataset.csv'.")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Ticker      Natural_Gas_Spot_Price
Date                              
2024-01-02                   2.568
2024-01-03                   2.668
2024-01-04                   2.821
2024-01-05                   2.893
2024-01-08                   2.980
...                            ...
2025-05-06                   3.463
2025-05-07                   3.621
2025-05-08                   3.592
2025-05-09                   3.795
2025-05-12                   3.646

[342 rows x 1 columns]
Dataset compiled and saved to 'compiled_dataset.csv'.


  result = getattr(ufunc, method)(*inputs, **kwargs)
