In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, Conv1D, MaxPooling1D, LSTM, concatenate
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np

# Define a custom transformer for normalizing the numerical features
class Normalizer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X_norm = X.iloc[:,:-1].div(X.iloc[:,-1], axis=0) # Normalize using total_votes column
        return X_norm.values

# Define a custom transformer for tokenizing and padding the text features
class TextPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self, max_words=10000, max_len=100):
        self.tokenizer = Tokenizer(num_words=max_words)
        self.max_len = max_len

    def fit(self, X, y=None):
        self.tokenizer.fit_on_texts(X)
        return self

    def transform(self, X, y=None):
        X_seq = self.tokenizer.texts_to_sequences(X)
        X_pad = pad_sequences(X_seq, maxlen=self.max_len)
        return X_pad

# Define the CNN-LSTM model architecture
def create_model(num_words, max_len):
    model = Sequential()
    model.add(Embedding(num_words, 128, input_length=max_len))
    model.add(Conv1D(64, 5, activation='relu'))
    model.add(MaxPooling1D(pool_size=4))
    model.add(LSTM(128))
    model.add(Dense(4, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Load the dataset
df = pd.read_csv('your_dataset.csv')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[['votes none', 'votes mild', 'votes moderate', 'votes severe', 'total_votes', 'text']], df['aspect_rating'], test_size=0.2, random_state=42)

# Define the preprocessing steps for the numerical features
num_transformer = Pipeline(steps=[
    ('normalizer', Normalizer()),
    ('scaler', StandardScaler())
])

# Define the preprocessing steps for the text features
text_transformer = Pipeline(steps=[
    ('text_preprocessor', TextPreprocessor(max_words=10000, max_len=100))
])

# Combine the preprocessing steps using a ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', num_transformer, ['votes none', 'votes mild', 'votes moderate', 'votes severe', 'total_votes']),
        ('text', text_transformer, 'text')
    ])

# Define the pipeline with the CNN-LSTM model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', KerasClassifier(build_fn=create_model, num_words=10000, max_len=100, epochs=10, batch_size=32))
])

# Fit the pipeline to the
