## LGBM Model


In [4]:
import lightgbm as lgb
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

In [None]:

param_grid = {
    'n_estimators': [500, 600, 700],
    'learning_rate': [0.08],
    'num_leaves': [43],
    'max_depth': [5],
    'min_data_in_leaf': [19],
    'lambda_l1': [0.1, 0.2],
    'lambda_l2': [0.1, 0.2],
    'min_gain_to_split': [0.1, 0.2],
    'bagging_fraction': [0.9, 0.85],
    'bagging_freq': [3, 5],
    'feature_fraction': [1],
    "colsample_bytree": [0.7],
}

model = lgb.LGBMRegressor(n_jobs=-1, objective='regression')
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_root_mean_squared_log_error')

grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)


best_model = grid_search.best_estimator_

Best Parameters: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 500, 'num_leaves': 31}
RMSLE: 0.152062916732084

param_grid = {
    'n_estimators': [500],
    'learning_rate': [0.1],
    'num_leaves': [31],
    'max_depth': [5],
    'min_data_in_leaf': [19],
    'lambda_l1': [0.1],
    'lambda_l2': [0.1],
    'min_gain_to_split': [0.1],
    'bagging_fraction': [0.9],
    'bagging_freq': [3],
    'feature_fraction': [0.9],
    'max_bin': [255],
    "colsample_bytree": [0.7],
}
0.15186426150275126

Best Parameters: {'bagging_fraction': 0.9, 'bagging_freq': 3, 'colsample_bytree': 0.7, 'feature_fraction': 0.9, 'lambda_l1': 0.1, 'lambda_l2': 0.1, 'learning_rate': 0.1, 'max_bin': 255, 'max_depth': 5, 'min_data_in_leaf': 19, 'min_gain_to_split': 0.1, 'n_estimators': 500, 'num_leaves': 31}


## Manual Vote Score (Bug TB Fixed)

In [None]:
def vote_score(modelslist, y_val, X_val):
    predictionslist = []
    for model in modelslist:
        predictions = model.predict(X_val)
        rounded_predictions = np.round(predictions).astype(int)
        rounded_predictions = rounded_predictions.flatten()
        predictionslist.append(rounded_predictions)
    
    finaloutput = []
    for index, prediction in enumerate(predictionslist[0]):
        if predictionslist[1][index] == predictionslist[2][index] == predictionslist[0][index]:
            finaloutput.append(prediction)
        elif predictionslist[0][index] == predictionslist[1][index]:
            finaloutput.append(predictionslist[0][index])
        elif predictionslist[0][index] == predictionslist[2][index]:
            finaloutput.append(predictionslist[0][index])
        elif predictionslist[1][index] == predictionslist[2][index]:
            finaloutput.append(predictionslist[1][index])
        else:
            roundedoutput = np.round(np.mean([predictionslist[0][index], predictionslist[1][index], predictionslist[2][index]])).astype(int)
            finaloutput.append(roundedoutput)
    
    y_test_log = np.log1p(y_val)
    y_pred_log = np.log1p(finaloutput)

    print("Sum of NaN values in y_test_log:", np.isnan(y_test_log).sum())
    print("Sum of NaN values in y_pred_log:", np.isnan(y_pred_log).sum())

    rmsle = mean_squared_error(y_test_log, y_pred_log, squared=False) 
    print("RMSLE:", rmsle)