In [None]:
# Familiar imports
import numpy as np
import pandas as pd

# For ordinal encoding categorical variables, splitting data
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split

# For training random forest model
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import optuna

In [None]:
df = pd.read_csv("../input/30days-folds/train_folds.csv")
df_test = pd.read_csv("../input/30-days-of-ml/test.csv")#, index_col=0)
sample_submission = pd.read_csv("../input/30-days-of-ml/sample_submission.csv")

In [None]:
useful_features = [col for col in df.columns if col not in ("id", "target", "kfold", "cat4")]
object_cols = [col for col in useful_features if 'cat' in col]  
df_test = df_test[useful_features]

In [None]:
final_predictions = []
for fold in range(5):
    xtrain = df[df.kfold != fold].reset_index(drop=True)
    xvalid = df[df.kfold == fold].reset_index(drop=True)
    xtest = df_test.copy()
    
    ytrain = xtrain.target
    yvalid = xvalid.target
    
    xtrain = xtrain[useful_features]
    xvalid = xvalid[useful_features]
    
    print(fold, "encoding")
    ordinal_encoder = OrdinalEncoder()   
    xtrain[object_cols] = ordinal_encoder.fit_transform(xtrain[object_cols])
    xvalid[object_cols] = ordinal_encoder.transform(xvalid[object_cols])
    xtest[object_cols] = ordinal_encoder.transform(xtest[object_cols])
    
    print(fold, "training")
    #model = RandomForestRegressor(random_state=fold, n_jobs=-1)# verbose=10,)
    #model.fit(xtrain, ytrain)
    #model = XGBRegressor(n_jobs=4, random_state=fold)
    #model.fit(xtrain, ytrain) # Your code here
    model = XGBRegressor(n_estimators=10000, learning_rate=0.05, 
                         random_state=fold, tree_method='gpu_hist', 
                         gpu_id=0, predictor='gpu_predictor')
    model.fit(xtrain, ytrain, 
                 early_stopping_rounds=5, 
                 eval_set=[(xvalid, yvalid)], 
                 verbose=False) # Your code here
    
    preds_valid = model.predict(xvalid)
    preds_test = model.predict(xtest)
    final_predictions.append(preds_test)
    print(fold, mean_squared_error(yvalid, preds_valid, squared=False))

In [None]:
np.column_stack(final_predictions).shape
preds = np.mean(np.column_stack(final_predictions), axis = 1)

In [None]:
sample_submission.target = preds
sample_submission.to_csv('submission.csv', index=False)