In [None]:
import xgboost as xgb
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

In [None]:
excel_file = "salesdaily-data.xlsx"
column_name = "R06"


df = pd.read_excel(excel_file)
df['datetime']=pd.to_datetime(df['datetime'])
df['Month']=df['datetime'].dt.month
meddata=df[['R06','Month']]

for i in range (1,2):
    meddata[f'Lag{i}']=meddata['R06'].shift(i)
meddata.dropna(inplace=True)
print(meddata.tail())

x=meddata.drop(columns='R06')
y=meddata['R06']




In [None]:


# Define hyperparameter grid
param_grid = {
    'learning_rate': [0.001, 0.01, 0.05, 0.1, 0.5],
    'n_estimators': [30, 50, 100, 150, 200, 300],
    'max_depth': [2, 3, 5, 7, 9, 11],
    'subsample': [0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.7, 0.8, 0.9, 1],
    'colsample_bylevel': [0.7, 0.8, 0.9, 1],
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

# Split the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Create the XGBoost Regressor
xg_reg = xgb.XGBRegressor(objective='reg:squarederror')

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_grid, 
                                   n_iter=100, scoring='neg_mean_squared_error', cv=10, verbose=1, random_state=42, n_jobs=-1)

# Fit the model
random_search.fit(x_train, y_train)

# Get the best parameters
best_params = random_search.best_params_
print(f'Best parameters found: {best_params}')

# Evaluate the model with best parameters
best_model = random_search.best_estimator_
y_pred = best_model.predict(x_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE: {rmse}')

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    non_zero_indices=y_true!=0
    y_true,y_pred=y_true[non_zero_indices],y_pred[non_zero_indices]
    return np.mean(np.abs((y_true - y_pred) /y_true)) * 100

mape = mean_absolute_percentage_error(y_test, y_pred)
print(f'Test MAPE: {mape}')
# Plot the results
plt.figure(figsize=(12, 6))
plt.plot(y_test.values[:60], label='Actual Sales')
plt.plot(y_pred[:60], label='Predicted Sales')
plt.legend()
plt.title('Predictions vs Actuals for drug R06')
plt.xlabel('Weeks')
plt.ylabel('Value')
plt.show()
