In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from catboost import Pool,CatBoostClassifier
from sklearn.metrics import roc_auc_score as AUC
from pycaret.classification import *

In [2]:
train = pd.read_csv('train.csv', index_col=0)
bulk = pd.read_csv('machine failure.csv')
bulk = bulk.drop(columns=['UDI'])
train_df = pd.concat([train, bulk], axis=0)
test_df = pd.read_csv("test.csv", index_col=0)

In [3]:
train_df.head()

Unnamed: 0,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
0,L50096,L,300.6,309.6,1596,36.1,140,0,0,0,0,0,0
1,M20343,M,302.6,312.1,1759,29.1,200,0,0,0,0,0,0
2,L49454,L,299.3,308.5,1805,26.5,25,0,0,0,0,0,0
3,L53355,L,301.0,310.9,1524,44.3,197,0,0,0,0,0,0
4,M24050,M,298.0,309.0,1641,35.4,34,0,0,0,0,0,0


In [4]:
X = train_df.drop(columns=['Machine failure'])
y = train_df['Machine failure']

In [5]:
feature_names = ['Product ID','Type',
                 'Air temperature [K]','Process temperature [K]',
                 'Rotational speed [rpm]','Torque [Nm]',
                 'Tool wear [min]','TWF','HDF',
                 'PWF','OSF','RNF']

train_pool = Pool(X.to_numpy(), y.to_numpy(), 
                  feature_names=feature_names, 
                  cat_features=['Product ID', 'Type'])

In [6]:
model = CatBoostClassifier()
model.fit(train_pool, eval_set=train_pool, verbose=False, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostClassifier at 0x7f984c168be0>

In [10]:
train_auc = AUC(train["Machine failure"], model.predict(train.drop(columns=["Machine failure"])))
print(f"Train Score: {train_auc}")

Train Score: 0.9161043054090814


In [12]:
test_pool = Pool(test_df.to_numpy(), 
                 feature_names=feature_names, 
                 cat_features=['Product ID', 'Type'])
pred = model.predict_proba(test_pool)[:, 1]
pred

array([0.00106075, 0.00208216, 0.00034873, ..., 0.00051742, 0.00164063,
       0.00080096])

In [14]:
sub = pd.read_csv('sample_submission.csv')
sub['Machine failure'] = pred
sub.to_csv('submission_cat_3.csv', index=False)