In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
import catboost as cb

### Reading Data

In [1]:
df = pd.read_csv('../input/diamonds-ds-ft-2109/diamonds_train.csv', index_col=0)
test_df = pd.read_csv('../input/diamonds-ds-ft-2109/diamonds_test.csv', index_col=0)

### Extra features

In [1]:
target='price'

color_map={'D':0, 'E':1, 'F':2, 'G':3, 'H':4, 'I':5, 'J':6}
df['color_map'] = df.color.map(color_map).astype('int32')
test_df['color_map'] = test_df.color.map(color_map).astype('int32')

clarity_map={'I1':0, 'SI2':1, 'SI1':2, 'VS2':3, 'VS1':4, 'VVS2':5, 'VVS1':6, 'IF':7}
df['clarity_map'] = df.clarity.map(clarity_map).astype('int32')
test_df['clarity_map'] = test_df.clarity.map(clarity_map).astype('int32')

cut_map={'Fair':0, 'Good':1, 'Very Good':2, 'Premium':3, 'Ideal':4}
df['cut_map'] = df.cut.map(cut_map).astype('int32')
test_df['cut_map'] = test_df.cut.map(cut_map).astype('int32')

df['shape'] = df.x/df.y
test_df['shape'] = test_df.x/test_df.y

### Using CatBoost as a model

In [1]:
n_folds=6
features = ['carat','clarity','cut','depth','table','color_map','cut_map','clarity_map','shape']
cat_features=['cut','clarity']
dataset = cb.Pool(df[features], df[target], cat_features=cat_features) 
params={
    'objective':'RMSE',
    'learning_rate': 0.05
}
cb_models = cb.cv(dataset, params=params, fold_count=n_folds, iterations=5000, early_stopping_rounds=50,
                 stratified=False, verbose_eval=100, return_models=True)

### Prediction

In [1]:
cb_res = np.zeros(test_df.shape[0])
for cb_model in cb_models[1]:
    cb_res += cb_model.predict(test_df[features])/n_folds
    
test_df['id']=test_df.index
test_df['price']=cb_res
test_df[['id', 'price']].to_csv('submission.csv', index=False)