In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
%load_ext cudf.pandas
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import cuml

# Setting Matplotlib defaults
plt.style.use('seaborn-v0_8')
plt.rc('figure', figsize=(8,5), dpi=150)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=15, titlepad=10)
plt.rc('animation', html='html5')
plt.tight_layout()
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
import warnings
warnings.simplefilter('ignore')

pd.set_option('display.max_columns', 500)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/playground-series-s5e5/sample_submission.csv
/kaggle/input/playground-series-s5e5/train.csv
/kaggle/input/playground-series-s5e5/test.csv
/kaggle/input/calories-burnt-prediction/calories.csv
/kaggle/input/xgb-cat-ensemble-model/test_xgb.pkl
/kaggle/input/xgb-cat-ensemble-model/__results__.html
/kaggle/input/xgb-cat-ensemble-model/submission.csv
/kaggle/input/xgb-cat-ensemble-model/__notebook__.ipynb
/kaggle/input/xgb-cat-ensemble-model/__output__.json
/kaggle/input/xgb-cat-ensemble-model/oof_xgb.pkl
/kaggle/input/xgb-cat-ensemble-model/custom.css
/kaggle/input/may-calorie-expenditure-oof-preds/test_preds.pkl
/kaggle/input/may-calorie-expenditure-oof-preds/submission.csv
/kaggle/input/may-calorie-expenditure-oof-preds/oof_preds.pkl
/kaggle/input/may-calorie-expenditure-oof-preds/catboost_info/test_error.tsv
/kaggle/input/may-calorie-expenditure-oof-preds/catboost_info/learn_error.tsv
/kaggle/input/may-calorie-expenditure-oof-preds/catboost_info/catboost_training.json
/kagg

<Figure size 1200x750 with 0 Axes>

In [2]:
train = pd.read_csv('/kaggle/input/playground-series-s5e5/train.csv', index_col='id')
test = pd.read_csv('/kaggle/input/playground-series-s5e5/test.csv', index_col='id')
org = pd.read_csv('/kaggle/input/calories-burnt-prediction/calories.csv', index_col='User_ID')
org = org.rename(columns={'Gender': 'Sex'})

In [3]:
train = pd.concat([train, org], ignore_index=True)

In [4]:
X = train.copy()
y = X.pop('Calories')
y = np.log1p(y)

X_test = test.copy()


In [5]:
oof_preds = joblib.load("/kaggle/input/may-calorie-expenditure-oof-preds/oof_preds.pkl")
test_preds = joblib.load("/kaggle/input/may-calorie-expenditure-oof-preds/test_preds.pkl")

In [6]:
from cuml.metrics import mean_squared_log_error
from scipy.optimize import differential_evolution

In [7]:
def de_objective(w, X, y):
    w = np.abs(w)
    w /= w.sum()
    preds = X.dot(w)
    return np.sqrt(mean_squared_log_error(np.expm1(y), np.expm1(preds)))

In [8]:
bounds = [(0.0, 1.0)] * oof_preds.shape[1]

result = differential_evolution(
    func=lambda w: de_objective(w, oof_preds, y),
    bounds=bounds,
    maxiter=2500,
    popsize=35,
    tol=1e-6,
    strategy='randtobest1exp',
    seed=100
)

best_w = np.abs(result.x)
best_w /= best_w.sum()

print(f'DE Best RMSLE: {result.fun:.6f}')

DE Best RMSLE: 0.058782


In [9]:
final_preds = test_preds.dot(best_w)

In [10]:
sub = pd.read_csv('/kaggle/input/playground-series-s5e5/sample_submission.csv')
sub['Calories'] = final_preds
sub.to_csv('submission.csv', index=False)
print("Your submission was successfully saved!")
print(sub.head(5))

Your submission was successfully saved!
       id    Calories
0  750000   27.231732
1  750001  107.832052
2  750002   87.587374
3  750003  125.647149
4  750004   75.848286
