Load the dataset from a CSV file.

In [None]:
import pandas as pd
data = pd.read_csv('yahoo_answers.csv')

Shuffle and split the dataset into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

Perform preprocessing and tokenize the training texts.

In [None]:
from keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_data['text'])

Visualize the distribution of labels in the training dataset.

In [None]:
import matplotlib.pyplot as plt
plt.hist(train_data['label'])
plt.show()

Normalize and encode the labels for training.

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(train_data['label'])

Prepare and define the model architecture for training.

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=128))
model.add(LSTM(64))
model.add(Dense(len(le.classes_), activation='softmax'))

Train the model and evaluate on the test dataset.

In [None]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Save the trained model to a file.

In [None]:
import joblib
joblib.dump(model, 'model.joblib')