In [None]:
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE
from catboost import CatBoostClassifier
from sklearn.preprocessing import LabelEncoder

train_data = pd.read_csv('traindata.csv')
test_data = pd.read_csv('testdata.csv')
id = test_data["ID"]

labelencoders = {}
def labelenc(data):
    for col in data.columns:
        if data[col].dtype == 'object':
            label_encoder = LabelEncoder()
            data[col] = label_encoder.fit_transform(data[col])
            labelencoders[col] = label_encoder

labelenc(train_data)

for col in test_data.columns:
    if test_data[col].dtype == 'object':
        labelencoder = labelencoders[col]
        test_data[col] = test_data[col].apply(lambda value: labelencoder.transform([value])[0] if value in labelencoder.classes_ else -1)


X_train = train_data.drop('Class', axis=1)
y_train = train_data['Class']
X_train.drop("ID", axis=1, inplace=True)
test_data.drop("ID", axis=1, inplace=True)

X_train.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train.fillna(0, inplace=True)
test_data.replace([np.inf, -np.inf], np.nan, inplace=True)
test_data.fillna(0, inplace=True)

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

catboost_classifier = CatBoostClassifier( iterations=109, depth=6, learning_rate=0.009)
catboost_classifier.fit(X_train_resampled, y_train_resampled)

y_pred_prob = catboost_classifier.predict_proba(test_data)[:, 1]
threshold = .3
y_pred = (y_pred_prob > threshold).astype(int)

results = pd.DataFrame({'ID': id, 'Class': y_pred})

results.to_csv('results2.csv', index=False)