In [4]:
import pandas as pd
import numpy as np

df = pd.read_csv('oil_price.csv')
df

Unnamed: 0,date,close
0,2000-08-23 00:00:00-04:00,32.049999
1,2000-08-24 00:00:00-04:00,31.629999
2,2000-08-25 00:00:00-04:00,32.049999
3,2000-08-28 00:00:00-04:00,32.869999
4,2000-08-29 00:00:00-04:00,32.720001
...,...,...
5837,2023-11-22 00:00:00-05:00,77.099998
5838,2023-11-24 00:00:00-05:00,75.540001
5839,2023-11-27 00:00:00-05:00,74.860001
5840,2023-11-28 00:00:00-05:00,76.410004


In [5]:
df['date'] = pd.to_datetime(df['date'], utc=True)
df

Unnamed: 0,date,close
0,2000-08-23 04:00:00+00:00,32.049999
1,2000-08-24 04:00:00+00:00,31.629999
2,2000-08-25 04:00:00+00:00,32.049999
3,2000-08-28 04:00:00+00:00,32.869999
4,2000-08-29 04:00:00+00:00,32.720001
...,...,...
5837,2023-11-22 05:00:00+00:00,77.099998
5838,2023-11-24 05:00:00+00:00,75.540001
5839,2023-11-27 05:00:00+00:00,74.860001
5840,2023-11-28 05:00:00+00:00,76.410004


In [6]:
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['day'].max()

31

In [9]:
df['cos_year'] = np.cos(2 * np.pi * df['year'] / 2024)  # Assuming a year has 2024 days
df['sin_year'] = np.sin(2 * np.pi * df['year'] / 2024)

df['cos_month'] = np.cos(2 * np.pi * df['month'] / 12)
df['sin_month'] = np.sin(2 * np.pi * df['month'] / 12)

df['cos_day'] = np.cos(2 * np.pi * df['day'] / 31)  # Assuming a month has 31 days
df['sin_day'] = np.sin(2 * np.pi * df['day'] / 31)

In [79]:
# split the data into train and test
from sklearn.model_selection import train_test_split

X = df[['cos_year', 'sin_year', 'cos_month', 'sin_month', 'cos_day', 'sin_day']]
y = df['close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

In [80]:
import lightgbm as lgb

params = {
    'objective': 'regression',
    'metric': 'mse',  # Mean Squared Error
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'seed': 42
}


# Define the LightGBM regressor
reg = lgb.LGBMRegressor(**params)

In [81]:
X_train

Unnamed: 0,cos_year,sin_year,cos_month,sin_month,cos_day,sin_day
1647,0.998608,-0.052749,6.123234e-17,1.000000e+00,0.979530,-0.201299
1669,0.998608,-0.052749,-8.660254e-01,5.000000e-01,0.918958,0.394356
3159,0.999417,-0.034141,-5.000000e-01,8.660254e-01,0.820763,0.571268
4577,0.999827,-0.018625,8.660254e-01,-5.000000e-01,-0.758758,-0.651372
4221,0.999764,-0.021729,-1.000000e+00,1.224647e-16,-0.250653,-0.968077
...,...,...,...,...,...,...
3772,0.999610,-0.027935,-1.836970e-16,-1.000000e+00,-0.050649,0.998717
5191,0.999957,-0.009313,-5.000000e-01,8.660254e-01,0.979530,-0.201299
5226,0.999957,-0.009313,-1.000000e+00,1.224647e-16,-0.440394,-0.897805
5390,0.999981,-0.006209,5.000000e-01,8.660254e-01,-0.612106,0.790776


In [82]:
reg.fit(X_train, y_train, verbose=1)





In [83]:
y_pred = reg.predict(X_test)
y_pred

array([57.53903779, 27.37833185, 27.37833185, ..., 39.81588122,
       52.90084867, 88.84280371])

In [84]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print("MSE: %.2f" % mse)
print("RMSE: %.2f" % rmse) 
 

MSE: 13.69
RMSE: 3.70


In [87]:
import dill as pickle
with open(f'predictive.pkl', 'wb') as handle:
    pickle.dump(reg, handle, protocol=pickle.HIGHEST_PROTOCOL, recurse=True)

In [72]:
start_date = pd.to_datetime('today').normalize()
end_date = start_date + pd.DateOffset(days=60)

date_range = pd.date_range(start=start_date, end=end_date, freq='D')
df_next = pd.DataFrame(date_range)


In [73]:
df_next

Unnamed: 0,0
0,2023-11-29
1,2023-11-30
2,2023-12-01
3,2023-12-02
4,2023-12-03
...,...
56,2024-01-24
57,2024-01-25
58,2024-01-26
59,2024-01-27


In [74]:
df_next['year'] = df_next[0].dt.year
df_next['month'] = df_next[0].dt.month
df_next['day'] = df_next[0].dt.day

df_next['cos_year'] = np.cos(2 * np.pi * df_next['year'] / 2024)
df_next['sin_year'] = np.sin(2 * np.pi * df_next['year'] / 2024)

df_next['cos_month'] = np.cos(2 * np.pi * df_next['month'] / 12)
df_next['sin_month'] = np.sin(2 * np.pi * df_next['month'] / 12)

df_next['cos_day'] = np.cos(2 * np.pi * df_next['day'] / 31)  # Assuming a month has 31 days
df_next['sin_day'] = np.sin(2 * np.pi * df_next['day'] / 31)

In [75]:
df_next

Unnamed: 0,0,year,month,day,cos_year,sin_year,cos_month,sin_month,cos_day,sin_day
0,2023-11-29,2023,11,29,0.999995,-3.104336e-03,0.866025,-5.000000e-01,0.918958,-0.394356
1,2023-11-30,2023,11,30,0.999995,-3.104336e-03,0.866025,-5.000000e-01,0.979530,-0.201299
2,2023-12-01,2023,12,1,0.999995,-3.104336e-03,1.000000,-2.449294e-16,0.979530,0.201299
3,2023-12-02,2023,12,2,0.999995,-3.104336e-03,1.000000,-2.449294e-16,0.918958,0.394356
4,2023-12-03,2023,12,3,0.999995,-3.104336e-03,1.000000,-2.449294e-16,0.820763,0.571268
...,...,...,...,...,...,...,...,...,...,...
56,2024-01-24,2024,1,24,1.000000,-2.449294e-16,0.866025,5.000000e-01,0.151428,-0.988468
57,2024-01-25,2024,1,25,1.000000,-2.449294e-16,0.866025,5.000000e-01,0.347305,-0.937752
58,2024-01-26,2024,1,26,1.000000,-2.449294e-16,0.866025,5.000000e-01,0.528964,-0.848644
59,2024-01-27,2024,1,27,1.000000,-2.449294e-16,0.866025,5.000000e-01,0.688967,-0.724793


In [76]:
y_pred_next = reg.predict(df_next[X_train.columns])
y_pred_next

array([77.81408956, 77.81408956, 75.49235473, 75.49235473, 75.49235473,
       75.49235473, 75.49235473, 75.49235473, 75.45431467, 75.45431467,
       75.45431467, 75.45431467, 75.42949864, 75.42949864, 75.42949864,
       75.42949864, 75.42949864, 75.46143175, 75.46143175, 75.46143175,
       75.46143175, 75.46143175, 75.46880629, 75.46880629, 75.46880629,
       75.46880629, 75.49863073, 75.49863073, 75.49863073, 75.49863073,
       75.52428784, 75.52428784, 75.52428784, 77.02114188, 77.02114188,
       76.71111934, 76.71111934, 76.71111934, 76.71111934, 76.77926232,
       76.77926232, 76.77926232, 76.77926232, 76.84507821, 76.84507821,
       77.15510075, 77.15510075, 77.15510075, 77.18703386, 77.2631979 ,
       77.2631979 , 77.2631979 , 77.2631979 , 77.27057244, 77.27057244,
       77.27057244, 77.27057244, 77.30039689, 77.30039689, 77.30039689,
       77.30039689])