In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense, BatchNormalization, Activation, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.compose import make_column_selector, ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [None]:
data = pd.read_csv('train.csv')
data.head()

Unnamed: 0,id,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,0,42,technician,married,secondary,no,7,no,no,cellular,25,aug,117,3,-1,0,unknown,0
1,1,38,blue-collar,married,secondary,no,514,no,no,unknown,18,jun,185,1,-1,0,unknown,0
2,2,36,blue-collar,married,secondary,no,602,yes,no,unknown,14,may,111,2,-1,0,unknown,0
3,3,27,student,single,secondary,no,34,yes,no,unknown,28,may,10,2,-1,0,unknown,0
4,4,26,technician,married,secondary,no,889,yes,no,cellular,3,feb,902,1,-1,0,unknown,1


In [None]:
data['job'].value_counts()

Unnamed: 0_level_0,count
job,Unnamed: 1_level_1
management,175541
blue-collar,170498
technician,138107
admin.,81492
services,64209
retired,35185
self-employed,19020
entrepreneur,17718
unemployed,17634
housemaid,15912


In [None]:
X = data.iloc[:, 1:-1]
X.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,42,technician,married,secondary,no,7,no,no,cellular,25,aug,117,3,-1,0,unknown
1,38,blue-collar,married,secondary,no,514,no,no,unknown,18,jun,185,1,-1,0,unknown
2,36,blue-collar,married,secondary,no,602,yes,no,unknown,14,may,111,2,-1,0,unknown
3,27,student,single,secondary,no,34,yes,no,unknown,28,may,10,2,-1,0,unknown
4,26,technician,married,secondary,no,889,yes,no,cellular,3,feb,902,1,-1,0,unknown


In [None]:
y = data['y']

In [None]:
num_cols = make_column_selector(dtype_include='number')
cat_cols = make_column_selector(dtype_include='object')

num_pipes = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

cat_pipes = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('oneh', OneHotEncoder(sparse_output=False, handle_unknown='ignore')),
])

ctx = ColumnTransformer([
    ('num', num_pipes, num_cols),
    ('cat', cat_pipes, cat_cols),
])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = ctx.fit_transform(X_train)
X_test = ctx.transform(X_test)

In [None]:
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

model = Sequential([
    Dense(units=256),
    BatchNormalization(),
    Activation('relu'),
    Dropout(0.3),
    Dense(units=128),
    BatchNormalization(),
    Activation('relu'),
    Dense(units=1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [10]:
model.fit(x=X_train, y=y_train, epochs=100, validation_data=(X_test,y_test), callbacks=[early_stop], batch_size=32)

Epoch 1/100
[1m18750/18750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 4ms/step - accuracy: 0.9141 - loss: 0.1926 - val_accuracy: 0.9280 - val_loss: 0.1639
Epoch 2/100
[1m18750/18750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 4ms/step - accuracy: 0.9244 - loss: 0.1713 - val_accuracy: 0.9284 - val_loss: 0.1639
Epoch 3/100
[1m18750/18750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 4ms/step - accuracy: 0.9258 - loss: 0.1686 - val_accuracy: 0.9292 - val_loss: 0.1614
Epoch 4/100
[1m18750/18750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 4ms/step - accuracy: 0.9271 - loss: 0.1654 - val_accuracy: 0.9301 - val_loss: 0.1598
Epoch 5/100
[1m18750/18750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 4ms/step - accuracy: 0.9275 - loss: 0.1642 - val_accuracy: 0.9304 - val_loss: 0.1596
Epoch 6/100
[1m18750/18750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 4ms/step - accuracy: 0.9277 - loss: 0.1643 - val_accuracy: 0.9297 - val_loss:

<keras.src.callbacks.history.History at 0x7a335062e610>

In [11]:
data_test = pd.read_csv('test.csv')
X_test = data_test.iloc[:, 1:]
X_test = ctx.transform(X_test)
y_pred = model.predict(X_test)

[1m7813/7813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step


In [12]:
print(y_pred)

[[2.2201929e-03]
 [1.6313402e-01]
 [1.7063339e-04]
 ...
 [7.8833348e-01]
 [1.8739090e-03]
 [1.8820475e-01]]


In [13]:
np.savetxt('output.csv', np.column_stack((data_test['id'], y_pred)), delimiter=',', comments='', header='id,y', fmt=['%d','%.7f'])
