In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle
import warnings

warnings.filterwarnings('ignore')

# Load dữ liệu
data = pd.read_csv('Churn_Modelling.csv')

# Xử lý dữ liệu
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

# Encode 'Gender'
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

# One-hot encode 'Geography'
onehot_encoder_geo = OneHotEncoder(handle_unknown='ignore')
geo_encoded = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoded, columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

# Kết hợp dữ liệu đã encode
data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)

# Tách biến độc lập và phụ thuộc
X = data.drop('Exited', axis=1)
y = data['Exited']

# Chia train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Lưu encoder và scaler
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)
with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

# ==============================
# 🔥 CUSTOM KERAS MODEL WRAPPER
# ==============================

from sklearn.base import BaseEstimator, ClassifierMixin

class KerasModelWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, neurons=32, layers=1, activation='relu', epochs=50, batch_size=10):
        self.neurons = neurons
        self.layers = layers
        self.activation = activation
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None  # Mô hình Keras

    def build_model(self):
        """Hàm khởi tạo mô hình Keras"""
        model = Sequential()
        model.add(Dense(self.neurons, activation=self.activation, input_shape=(X_train.shape[1],)))
        
        for _ in range(self.layers - 1):
            model.add(Dense(self.neurons, activation=self.activation))
        
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    def fit(self, X, y):
        """Huấn luyện mô hình"""
        self.model = self.build_model()
        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
        return self  # Bắt buộc phải trả về self

    def predict(self, X):
        """Dự đoán"""
        return (self.model.predict(X) > 0.5).astype(int).flatten()

    def score(self, X, y):
        """Tính accuracy"""
        return np.mean(self.predict(X) == y)

# ==============================
# 🔥 GRID SEARCH CV
# ==============================

# Tạo mô hình
model = KerasModelWrapper()

# Định nghĩa GridSearch
param_grid = {
    'neurons': [16, 32, 64],
    'layers': [1, 2],
    'activation': ['relu', 'tanh'],
    'epochs': [50, 100],
    'batch_size': [10, 20]
}

# Sử dụng n_jobs=1 để tránh lỗi Keras không hỗ trợ multiprocessing
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='accuracy', n_jobs=1)
grid_result = grid.fit(X_train, y_train)

# In kết quả tốt nhất
print("Best accuracy: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


ImportError: generic_type: type "InterpreterWrapper" is already registered!