In [None]:
"import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Embedding, Dense, Concatenate, Dropout, 
    BatchNormalization, Flatten
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, log_loss

class CTRDeepNN:
    def __init__(self, categorical_features_info, numerical_features_count):
        """
        Initialize CTR Deep Neural Network
        
        Args:
            categorical_features_info: dict with feature_name: vocab_size
            numerical_features_count: int, number of numerical features
        """
        self.categorical_features_info = categorical_features_info
        self.numerical_features_count = numerical_features_count
        self.model = None
        
    def build_model(self, embedding_dim=50, hidden_layers=[512, 256, 128], 
                   dropout_rate=0.3, l2_reg=1e-5):
        """
        Build the deep neural network model
        
        Args:
            embedding_dim: dimension for embeddings
            hidden_layers: list of hidden layer sizes
            dropout_rate: dropout rate for regularization
            l2_reg: L2 regularization factor
        """
        
        # Input layers
        inputs = []
        embeddings = []
        
        # 1. Create embedding layers for categorical features
        for feature_name, vocab_size in self.categorical_features_info.items():
            input_layer = Input(shape=(1,), name=f'input_{feature_name}')
            inputs.append(input_layer)
            
            # Calculate embedding dimension (rule of thumb: min(50, vocab_size//2))
            emb_dim = min(embedding_dim, max(vocab_size // 2, 1))
            
            embedding_layer = Embedding(
                input_dim=vocab_size,
                output_dim=emb_dim,
                embeddings_regularizer=tf.keras.regularizers.l2(l2_reg),
                name=f'embedding_{feature_name}'
            )(input_layer)
            
            # Flatten the embedding
            embedding_flat = Flatten()(embedding_layer)
            embeddings.append(embedding_flat)
        
        # 2. Input layer for numerical features
        if self.numerical_features_count > 0:
            numerical_input = Input(shape=(self.numerical_features_count,), 
                                  name='numerical_features')
            inputs.append(numerical_input)
            
            # Optional: apply batch normalization to numerical features
            numerical_bn = BatchNormalization(name='numerical_bn')(numerical_input)
            embeddings.append(numerical_bn)
        
        # 3. Concatenate all features
        if len(embeddings) > 1:
            concatenated = Concatenate(name='feature_concat')(embeddings)
        else:
            concatenated = embeddings[0]
        
        # 4. Deep Neural Network layers
        x = concatenated
        
        for i, hidden_size in enumerate(hidden_layers):
            # Dense layer
            x = Dense(
                hidden_size,
                activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(l2_reg),
                name=f'dense_{i+1}'
            )(x)
            
            # Batch normalization
            x = BatchNormalization(name=f'bn_{i+1}')(x)
            
            # Dropout for regularization
            x = Dropout(dropout_rate, name=f'dropout_{i+1}')(x)
        
        # 5. Output layer for binary classification (CTR prediction)
        output = Dense(1, activation='sigmoid', name='ctr_output')(x)
        
        # 6. Create and compile model
        self.model = Model(inputs=inputs, outputs=output, name='CTR_DeepNN')
        
        self.model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='binary_crossentropy',
            metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
        )
        
        return self.model
    
    def prepare_input_data(self, df, categorical_columns, numerical_columns, target_column):
        """
        Prepare input data for the model
        
        Args:
            df: pandas DataFrame
            categorical_columns: list of categorical column names
            numerical_columns: list of numerical column names  
            target_column: name of target column
        """
        X = {}
        
        # Categorical features (each as separate input)
        for col in categorical_columns:
            X[f'input_{col}'] = df[col].values.reshape(-1, 1)
        
        # Numerical features (combined as single input)
        if numerical_columns:
            X['numerical_features'] = df[numerical_columns].values
        
        y = df[target_column].values
        
        return X, y
    
    def train(self, X_train, y_train, X_val=None, y_val=None, 
              batch_size=512, epochs=100, verbose=1):
        """
        Train the model
        """
        callbacks = [
            EarlyStopping(
                monitor='val_loss' if X_val is not None else 'loss',
                patience=10,
                restore_best_weights=True
            ),
            ReduceLROnPlateau(
                monitor='val_loss' if X_val is not None else 'loss',
                factor=0.5,
                patience=5,
                min_lr=1e-7
            )
        ]
        
        validation_data = (X_val, y_val) if X_val is not None else None
        
        history = self.model.fit(
            X_train, y_train,
            validation_data=validation_data,
            batch_size=batch_size,
            epochs=epochs,
            callbacks=callbacks,
            verbose=verbose
        )
        
        return history
    
    def predict(self, X):
        """
        Make predictions
        """
        return self.model.predict(X)
    
    def evaluate(self, X_test, y_test):
        """
        Evaluate the model
        """
        y_pred = self.predict(X_test)
        
        # Calculate metrics
        auc_score = roc_auc_score(y_test, y_pred)
        logloss = log_loss(y_test, y_pred)
        
        print(f"Test AUC: {auc_score:.4f}")
        print(f"Test LogLoss: {logloss:.4f}")
        
        return auc_score, logloss

# Example usage with your iPinYou dataset
def main():
    """
    Example of how to use the CTR model with your preprocessed data
    """
    
    # Example: Load your preprocessed iPinYou data
    # df = pd.read_csv('your_preprocessed_ipinyou_data.csv')
    
    # Define your feature information based on your preprocessing
    # Example feature configuration:
    categorical_features_info = {
        'weekday': 7,        # 7 days of week
        'hour': 24,          # 24 hours
        'useragent': 1000,   # vocab size after label encoding
        'region': 500,       # vocab size after label encoding
        'city': 1000,        # vocab size after label encoding
        'adexchange': 10,    # number of ad exchanges
        'domain': 5000,      # vocab size after label encoding
        'slotid': 2000,      # vocab size after label encoding
        'slotwidth': 50,     # number of unique slot widths
        'slotheight': 50,    # number of unique slot heights
        'slotvisibility': 5, # visibility categories
        'slotformat': 10,    # format categories
    }
    
    # Numerical features (from one-hot encoding or continuous features)
    numerical_columns = ['bid_price', 'payprice']  # Add your numerical columns
    numerical_features_count = len(numerical_columns)
    
    # Initialize the model
    ctr_model = CTRDeepNN(
        categorical_features_info=categorical_features_info,
        numerical_features_count=numerical_features_count
    )
    
    # Build the model architecture
    model = ctr_model.build_model(
        embedding_dim=50,
        hidden_layers=[512, 256, 128, 64],  # 4 hidden layers
        dropout_rate=0.3,
        l2_reg=1e-5
    )
    
    # Print model summary
    model.summary()
    
    # Example of preparing data (replace with your actual data)
    """
    categorical_columns = list(categorical_features_info.keys())
    target_column = 'click'  # your target column name
    
    X, y = ctr_model.prepare_input_data(
        df, categorical_columns, numerical_columns, target_column
    )
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    # Further split training data for validation
    X_train_final = {}
    X_val = {}
    for key in X_train.keys():
        X_train_final[key], X_val[key], y_train_final, y_val = train_test_split(
            X_train[key], y_train, test_size=0.2, random_state=42, stratify=y_train
        )
    
    # Train the model
    history = ctr_model.train(
        X_train_final, y_train_final,
        X_val, y_val,
        batch_size=512,
        epochs=100
    )
    
    # Evaluate the model
    auc_score, logloss = ctr_model.evaluate(X_test, y_test)
    """

if __name__ == "__main__":
    main()"