In [2]:
import pandas as pd

df = pd.read_csv("../data/gold_price_cleaned.csv")

# Convert Date column back to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Sort by date
df = df.sort_values('Date')

df.head()


Unnamed: 0,Year,Month,Gold_Price_INR_per_10g,Date
0,2019,1,34190,2019-01-01
1,2019,2,34700,2019-02-01
2,2019,3,35220,2019-03-01
3,2019,4,35750,2019-04-01
4,2019,5,36080,2019-05-01


In [3]:
# Lag feature: previous month's price
df['Price_Lag_1'] = df['Gold_Price_INR_per_10g'].shift(1)

# Check
df[['Date', 'Gold_Price_INR_per_10g', 'Price_Lag_1']].head()


Unnamed: 0,Date,Gold_Price_INR_per_10g,Price_Lag_1
0,2019-01-01,34190,
1,2019-02-01,34700,34190.0
2,2019-03-01,35220,34700.0
3,2019-04-01,35750,35220.0
4,2019-05-01,36080,35750.0


In [None]:
# Rolling average (moving average) to smooth out short-term fluctuations
df['Rolling_3_Month'] = df['Gold_Price_INR_per_10g'].rolling(window=3).mean()

df['Rolling_6_Month'] = df['Gold_Price_INR_per_10g'].rolling(window=6).mean()

df[['Date', 'Gold_Price_INR_per_10g', 'Rolling_3_Month', 'Rolling_6_Month']].head(8)



Unnamed: 0,Date,Gold_Price_INR_per_10g,Rolling_3_Month,Rolling_6_Month
0,2019-01-01,34190,,
1,2019-02-01,34700,,
2,2019-03-01,35220,34703.333333,
3,2019-04-01,35750,35223.333333,
4,2019-05-01,36080,35683.333333,
5,2019-06-01,36420,36083.333333,35393.333333
6,2019-07-01,36830,36443.333333,35833.333333
7,2019-08-01,37210,36820.0,36251.666667


In [None]:
# Calculate the month-over-month change in gold price to analyze volatility
df['Monthly_Change'] = df['Gold_Price_INR_per_10g'].diff()

df[['Date', 'Monthly_Change']].head()



Unnamed: 0,Date,Monthly_Change
0,2019-01-01,
1,2019-02-01,510.0
2,2019-03-01,520.0
3,2019-04-01,530.0
4,2019-05-01,330.0


In [None]:
# Calculate the month-over-month percentage change in gold price to analyze volatility
df['Monthly_Percent_Change'] = df['Gold_Price_INR_per_10g'].pct_change() * 100


In [7]:
df.isnull().sum()

df = df.dropna()
df.head()

Unnamed: 0,Year,Month,Gold_Price_INR_per_10g,Date,Price_Lag_1,Rolling_3_Month,Rolling_6_Month,Monthly_Change,Monthly_Percent_Change
5,2019,6,36420,2019-06-01,36080.0,36083.333333,35393.333333,340.0,0.94235
6,2019,7,36830,2019-07-01,36420.0,36443.333333,35833.333333,410.0,1.125755
7,2019,8,37210,2019-08-01,36830.0,36820.0,36251.666667,380.0,1.031768
8,2019,9,36970,2019-09-01,37210.0,37003.333333,36543.333333,-240.0,-0.644988
9,2019,10,37450,2019-10-01,36970.0,37210.0,36826.666667,480.0,1.29835


In [None]:
# Create target variable: next month's price
df['Target_Price'] = df['Gold_Price_INR_per_10g'].shift(-1)

df[['Date', 'Gold_Price_INR_per_10g', 'Target_Price']].head()

Unnamed: 0,Date,Gold_Price_INR_per_10g,Target_Price
5,2019-06-01,36420,36830.0
6,2019-07-01,36830,37210.0
7,2019-08-01,37210,36970.0
8,2019-09-01,36970,37450.0
9,2019-10-01,37450,37890.0


In [9]:
X = df[
    [
        'Gold_Price_INR_per_10g',
        'Price_Lag_1',
        'Rolling_3_Month',
        'Rolling_6_Month',
        'Monthly_Change',
        'Monthly_Percent_Change',
        'Month',
        'Year'
    ]
]

y = df['Target_Price']


In [10]:
X.head()
y.head()


5    36830.0
6    37210.0
7    36970.0
8    37450.0
9    37890.0
Name: Target_Price, dtype: float64

In [13]:
df.to_csv("../data/gold_price_feature_engineered.csv", index=False)


### Feature Engineering Summary

- Created lag feature to capture previous price effect
- Created rolling averages to smooth price trends
- Added momentum and percentage change features
- Defined next-month gold price as prediction target
- Removed rows with missing values caused by feature creation
- Saved feature-engineered dataset for modeling
