# Task 1: Natural Gas Price Analysis & Extrapolation

## Setup & Data Loading

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from datetime import datetime


In [None]:

df = pd.read_csv("/mnt/data/Nat_Gas.csv")
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date').reset_index(drop=True)
df.head()


## Exploratory Data Analysis

In [None]:

plt.figure(figsize=(10,5))
plt.plot(df['Date'], df['Price'])
plt.title("Monthly Natural Gas Prices")
plt.xlabel("Date")
plt.ylabel("Price")
plt.grid(True)
plt.show()


In [None]:

df['Month'] = df['Date'].dt.month
monthly_avg = df.groupby('Month')['Price'].mean()

plt.figure(figsize=(8,4))
monthly_avg.plot(kind='bar')
plt.title("Average Natural Gas Price by Month")
plt.xlabel("Month")
plt.ylabel("Average Price")
plt.grid(True)
plt.show()


## Feature Engineering & Model Training

In [None]:

df['TimeIndex'] = np.arange(len(df))
df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)

X = df[['TimeIndex', 'Month_sin', 'Month_cos']]
y = df['Price']

model = LinearRegression()
model.fit(X, y)


## Price Estimation Function

In [None]:

def estimate_gas_price(input_date):
    input_date = pd.to_datetime(input_date)
    time_index = (input_date.year - df['Date'].min().year) * 12 +                  (input_date.month - df['Date'].min().month)
    month = input_date.month
    month_sin = np.sin(2 * np.pi * month / 12)
    month_cos = np.cos(2 * np.pi * month / 12)
    X_input = pd.DataFrame([[time_index, month_sin, month_cos]],
                           columns=['TimeIndex', 'Month_sin', 'Month_cos'])
    return float(model.predict(X_input))


## Example Predictions

In [None]:

estimate_gas_price("2022-01-31")


In [None]:

estimate_gas_price("2025-09-30")


## 1-Year Forecast Visualization

In [None]:

last_date = df['Date'].max()
future_dates = pd.date_range(start=last_date, periods=13, freq='M')
future_prices = [estimate_gas_price(d) for d in future_dates]

plt.figure(figsize=(10,5))
plt.plot(df['Date'], df['Price'], label="Historical")
plt.plot(future_dates, future_prices, linestyle='--', label="Forecast (1 Year)")
plt.title("Natural Gas Price Forecast")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid(True)
plt.show()
