In [None]:
# Natural Gas Price Predictor
## Machine Learning Analysis for Commodity Trading

##This notebook analyzes historical natural gas prices and builds a predictive model for future price forecasting.

# Install required packages
!pip install pandas numpy matplotlib scikit-learn

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_absolute_error, r2_score
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

In [None]:
# Load the data
df = pd.read_csv('Nat_Gas.csv')

# Convert data types
df['Dates'] = pd.to_datetime(df['Dates'])
df['Prices'] = pd.to_numeric(df['Prices'])

print("Dataset Overview:")
print(f"Shape: {df.shape}")
print(f"Date Range: {df['Dates'].min()} to {df['Dates'].max()}")
print(f"Price Range: ${df['Prices'].min():.2f} - ${df['Prices'].max():.2f}")

# Display first few rows
df.head()

In [None]:
# Create a comprehensive visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. Price trend over time
axes[0,0].plot(df['Dates'], df['Prices'], marker='o', linewidth=2, color='blue')
axes[0,0].set_title('Natural Gas Prices Over Time', fontweight='bold')
axes[0,0].set_xlabel('Date')
axes[0,0].set_ylabel('Price ($)')
axes[0,0].grid(True, alpha=0.3)
axes[0,0].tick_params(axis='x', rotation=45)

# 2. Seasonal analysis
df['Month'] = df['Dates'].dt.month
monthly_avg = df.groupby('Month')['Prices'].mean()
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
               'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

axes[0,1].bar(month_names, monthly_avg, color='lightblue', edgecolor='navy')
axes[0,1].set_title('Average Prices by Month (Seasonal Pattern)', fontweight='bold')
axes[0,1].set_xlabel('Month')
axes[0,1].set_ylabel('Average Price ($)')
axes[0,1].grid(True, alpha=0.3)

# 3. Price distribution
axes[1,0].hist(df['Prices'], bins=15, color='green', alpha=0.7, edgecolor='black')
axes[1,0].set_title('Price Distribution', fontweight='bold')
axes[1,0].set_xlabel('Price ($)')
axes[1,0].set_ylabel('Frequency')
axes[1,0].grid(True, alpha=0.3)

# 4. Yearly trends
df['Year'] = df['Dates'].dt.year
yearly_avg = df.groupby('Year')['Prices'].mean()
axes[1,1].plot(yearly_avg.index, yearly_avg.values, marker='s', linewidth=2, color='red')
axes[1,1].set_title('Yearly Average Prices', fontweight='bold')
axes[1,1].set_xlabel('Year')
axes[1,1].set_ylabel('Average Price ($)')
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Prepare features for machine learning
df['Days'] = (df['Dates'] - df['Dates'].min()).dt.days

X = df['Days'].values.reshape(-1, 1)  # Features
y = df['Prices'].values               # Target

print("Data Prepared for Machine Learning:")
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"Date range in days: {df['Days'].min()} to {df['Days'].max()}")

In [None]:
# Create polynomial features and train model
poly = PolynomialFeatures(degree=3)
X_poly = poly.fit_transform(X)

model = LinearRegression()
model.fit(X_poly, y)

# Make predictions on training data
y_pred = model.predict(X_poly)

# Calculate model performance
mae = mean_absolute_error(y, y_pred)
r2 = r2_score(y, y_pred)

print("Model Training Results:")
print(f"Mean Absolute Error: ${mae:.2f}")
print(f"R-squared Score: {r2:.4f}")

if r2 > 0.8:
    print("Excellent model performance!")
elif r2 > 0.6:
    print(" Good model performance!")
else:
    print("Model may need improvement")

In [None]:
def predict_gas_price(input_date):
    """
    Predict natural gas price for any given date
    """
    try:
        if isinstance(input_date, str):
            input_date = pd.to_datetime(input_date)
        
        start_date = df['Dates'].min()
        days_from_start = (input_date - start_date).days
        
        days_array = np.array([[days_from_start]])
        days_poly = poly.transform(days_array)
        
        predicted_price = model.predict(days_poly)[0]
        return max(0, predicted_price)  # Ensure no negative prices
        
    except Exception as e:
        return f"Error: {e}"

# Test the function
test_dates = ['2024-12-25', '2025-06-15', '2025-12-31']
print("Sample Predictions:")
for date in test_dates:
    price = predict_gas_price(date)
    print(f"   {date}: ${price:.2f}")

In [None]:
# Generate 1-year future forecast
last_date = df['Dates'].max()
future_dates = [last_date + timedelta(days=30*i) for i in range(13)]
future_prices = [predict_gas_price(date) for date in future_dates]

# Plot historical data and future predictions
plt.figure(figsize=(14, 7))
plt.plot(df['Dates'], df['Prices'], 'bo-', label='Historical Prices', linewidth=2, markersize=4)
plt.plot(future_dates, future_prices, 'ro--', label='Future Predictions', linewidth=2, markersize=5)
plt.title('Natural Gas Prices: Historical Data + 1-Year Forecast', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print(" Forecast Summary:")
print(f"Current price: ${df['Prices'].iloc[-1]:.2f}")
print(f"6-month forecast: ${future_prices[6]:.2f}")
print(f"1-year forecast: ${future_prices[12]:.2f}")

In [None]:
# Interactive prediction tool
print("INTERACTIVE PRICE PREDICTOR")
print("=" * 40)

while True:
    print("\nEnter a date to predict natural gas price (YYYY-MM-DD)")
    print("Or type 'quit' to exit")
    
    user_input = input("\n Date: ").strip()
    
    if user_input.lower() == 'quit':
        print(" Thank you for using the predictor!")
        break
    
    try:
        price = predict_gas_price(user_input)
        if isinstance(price, str):  # Error message
            print(f" {price}")
        else:
            avg_price = df['Prices'].mean()
            print(f"Predicted Price: ${price:.2f}")
            print(f"Historical Average: ${avg_price:.2f}")
            
            # Provide insights
            if price > avg_price * 1.1:
                print(" Above average - consider this in trading strategy")
            elif price < avg_price * 0.9:
                print(" Below average - potential buying opportunity")
            else:
                print("Around historical average")
                
    except Exception as e:
        print(f" Invalid input. Please use YYYY-MM-DD format")