In [None]:
import pandas as pd
import lightgbm as lgb

In [None]:
cv = pd.read_csv("val.csv")
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [None]:
data=lgb.Dataset(train.drop(['id','sales','date','d'],axis=1),label=train['sales'],categorical_feature=columns,free_raw_data=False)

In [None]:
data_cv=lgb.Dataset(cv.drop(['id','sales','date','d'],axis=1),label=cv['sales'],categorical_feature=columns,free_raw_data=False)

In [None]:
import itertools

base_params = {
    "objective": "tweedie",
    "metric": "rmse",
    "tweedie_variance_power": 1.1,
    "force_row_wise": True,
    "nthread": 8,
    "verbosity": -1,  # Keep it quiet
    "max_bin": 127,
    "bin_construct_sample_cnt": 20000000,
    "boost_from_average": True,
    "n_estimators": 6000,  # Allow it to run long
}

param_grid = {
    'learning_rate': [0.05, 0.01],
    'num_leaves': [31, 128, 255],        # 31 is standard, 255 is complex
    'min_data_in_leaf': [50, 200],       # Constraints to prevent overfitting
    'lambda_l2': [0.1, 10.0],            # L2 Regularization (Low vs High)
    'feature_fraction': [0.8],           # Keep constant to reduce search space
    'bagging_fraction': [0.8],           # Keep constant to reduce search space
    'bagging_freq': [1]
}

keys, values = zip(*param_grid.items())
combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

best_score = float('inf')
best_params = {}
best_iteration = 0

for i, config in enumerate(combinations):
    print(f"\n--- Running Config {i+1}/{len(combinations)} ---")
    print(config)

    current_params = {**base_params, **config}

    model = lgb.train(
        current_params,
        data,
        valid_sets=[data, data_cv],
        callbacks=[
            lgb.early_stopping(stopping_rounds=50, verbose=False),
            lgb.log_evaluation(period=0) # Set to 0 to avoid flooding console
        ]
    )

    score = model.best_score['valid_1']['rmse']

    print(f"--> Result: RMSE {score:.5f} (at iter {model.best_iteration})")

    if score < best_score:
        print(f"*** NEW BEST MODEL FOUND! (Previous: {best_score:.5f}) ***")
        best_score = score
        best_params = current_params
        best_iteration = model.best_iteration

print("\n" + "="*50)
print("GRID SEARCH COMPLETE")
print("="*50)
print(f"Best Validation RMSE: {best_score}")
print(f"Best Iteration: {best_iteration}")
print("Best Parameters:")
for k, v in best_params.items():
    print(f"  {k}: {v}")

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

test_df = test

y_test = test_df['sales']
X_test = test_df.drop(['id', 'sales', 'date', 'd'], axis=1)

preds = best_iteration.predict(X_test)

preds = np.maximum(preds, 0)

mse = mean_squared_error(y_test, preds)
rmse = np.sqrt(mse)

print("==================================================")
print(f"FINAL TEST RMSE: {rmse:.5f}")
print("==================================================")