In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from scipy.sparse import coo_matrix, hstack
from catboost import CatBoostClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder


train_df = pd.read_csv('/kaggle/input/richters-predictor-modeling-earthquake-damage/train_values.csv', index_col='building_id')
test_df = pd.read_csv('/kaggle/input/richters-predictor-modeling-earthquake-damage/test_values.csv',  index_col='building_id')
target_df = pd.read_csv('/kaggle/input/richters-predictor-modeling-earthquake-damage/train_labels.csv', index_col='building_id')


In [None]:
idx = train_df.shape[0]
data_df = pd.concat([train_df, test_df], sort=False)
data_df.drop(columns=["legal_ownership_status", "count_families",
                                 "has_secondary_use_rental"], inplace=True)



In [None]:
X_train = train_df
X_test = test_df
y_train = target_df['damage_grade'].values
X_train_split, X_valid_split, y_train_split, y_valid_split = train_test_split(X_train, y_train,
                                                                              test_size=0.15,
                                                                              random_state=42)


In [None]:
cat_cls = CatBoostClassifier(iterations=4000,
                             loss_function='MultiClass',
                             eval_metric='Accuracy',
                             task_type="GPU",
                             depth=9,
                             one_hot_max_size=20,
                             cat_features=['geo_level_1_id', 'geo_level_2_id', 'geo_level_3_id','land_surface_condition', 'foundation_type',
                'roof_type',
                'ground_floor_type', 'other_floor_type', 'position', 'plan_configuration', 'legal_ownership_status'],
                            leaf_estimation_iterations=15)
cat_cls.fit(X_train_split, y_train_split)
y_pred = cat_cls.predict(X_valid_split)
f1_score(y_valid_split, y_pred, average='micro')
cat_cls.fit(X_train, y_train)
y_pred = cat_cls.predict(X_test)
predicted_df = pd.DataFrame(y_pred.astype(np.int8), index=test_df.index, columns=['damage_grade'])
predicted_df.to_csv('baseline_new.csv')