In [None]:
#importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor

In [None]:
#Reading data from files
df_train = pd.read_csv("../input/30days-folds/training_folds_1.csv")
df_test = pd.read_csv("../input/30-days-of-ml/test.csv")
df_submission = pd.read_csv("../input/30-days-of-ml/sample_submission.csv")

In [None]:
useless_features = ["id", "target", "K-Folds"]
useful_features = [x for x in df_train.columns if x not in useless_features]
object_cols = [col for col in useful_features if 'cat' in col]
df_test = df_test[useful_features]

In [None]:
xgb_params = {
    'random_state': 1, 
    
    # gpu
#     'tree_method': 'gpu_hist', 
#     'gpu_id': 0, 
#     'predictor': 'gpu_predictor',
    
    # cpu
    'n_jobs': 4,
    'booster': 'gbtree',
    'n_estimators': 10000,
    
    # optimized params
    'learning_rate': 0.03628302216953097,
    'reg_lambda': 0.0008746338866473539,
    'reg_alpha': 23.13181079976304,
    'subsample': 0.7875490025178415,
    'colsample_bytree': 0.11807135201147481,
    'max_depth': 3
}

In [None]:
#declaring a list to contain results from all folds as a list of lists
final_predictions = []
rmse_scores = []

#a loop for all K = 8 folds
for K in range(8):
    X_train = df_train[df_train["K-Folds"] != K].reset_index(drop = True)
    X_valid = df_train[df_train["K-Folds"] == K].reset_index(drop = True)
    
    y_train = X_train.target
    y_valid = X_valid.target
    
    X_train = X_train[useful_features]
    X_valid = X_valid[useful_features]
    
    X_test = df_test.copy()
    
    #ordinal encoding
    OE = OrdinalEncoder()
    
    X_train[object_cols] = OE.fit_transform(X_train[object_cols])
    X_valid[object_cols] = OE.transform(X_valid[object_cols])
    
    X_test[object_cols] = OE.transform(X_test[object_cols])
    
    #model training
    model = XGBRegressor(**xgb_params)
    model.fit(X_train, y_train, early_stopping_rounds=300, eval_set=[(X_valid, y_valid)], verbose=1000)
    
    #making predictions
    validation_predictions = model.predict(X_valid)
    test_predictions = model.predict(X_test)
    
    #appending test_predictions in final_predictions
    final_predictions.append(test_predictions)
    
    rmse = mean_squared_error(y_valid, validation_predictions, squared=False)
    rmse_scores.append(rmse)
    print(K, rmse)
    
print(np.mean(rmse_scores), np.std(rmse_scores))

In [None]:
prediction = np.mean(np.column_stack(final_predictions), axis=1)

In [None]:
df_submission.target = prediction
df_submission.to_csv("submission.csv", index=False)