In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf
import datetime
import pickle



In [None]:
def load_data(x_path='X_reduced.pkl', y_path='y.pkl'):
    with open(x_path, 'rb') as f:
        X = pickle.load(f)
    with open(y_path, 'rb') as f:
        y = pickle.load(f)
    return X, y


def load_model_objects():
    with open('vectorizer.pkl', 'rb') as f:
        vectorizer = pickle.load(f)
    with open('svd.pkl', 'rb') as f:
        svd = pickle.load(f)
    with open('top_tags.pkl', 'rb') as f:
        top_tags = pickle.load(f)
    return vectorizer, svd, top_tags


print(top_tags)

In [None]:

def split_data(X, y, test_size=0.2, random_state=42):
    return train_test_split(X, y, test_size=test_size, random_state=random_state)


In [None]:
def build_model(input_shape, output_shape):
    model = Sequential([
        Input(shape=(input_shape,)),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(output_shape, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [None]:
def train_model(model, X_train, y_train, X_val, y_val, log_dir="logs/fit"):
    log_dir = log_dir + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
    early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    checkpoint_callback = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min')

    history = model.fit(
        X_train, y_train,
        epochs=10,
        batch_size=32,
        validation_data=(X_val, y_val),
        callbacks=[tensorboard_callback, early_stopping_callback, checkpoint_callback]
    )
    return history

In [None]:

def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f'Loss: {loss}, Accuracy: {accuracy}')
    return loss, accuracy

def plot_training_history(history):
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Loss over epochs')

    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Accuracy over epochs')

    plt.show()

In [None]:
X, y = load_data()
vectorizer, svd, top_tags = load_model_objects()

X_train, X_test, y_train, y_test = split_data(X, y)

model = build_model(X_train.shape[1], y_train.shape[1])

history = train_model(model, X_train, y_train, X_test, y_test)

evaluate_model(model, X_test, y_test)

plot_training_history(history)

In [None]:

def transform_text_to_bow(text, vectorizer, svd):
    X_bow = vectorizer.transform([text])

    X_reduced = svd.transform(X_bow)

    return X_reduced

new_question_text = "help me please i didn't success to install"
new_question_vector = transform_text_to_bow(new_question_text, vectorizer, svd)

predicted_tags = model.predict(new_question_vector)  

predicted_tags = (predicted_tags > 0.1).astype(int)

predicted_tag_names = [top_tags[i] for i in range(len(predicted_tags[0])) if predicted_tags[0][i] == 1]

print(top_tags)

print("Tags suggérés :", predicted_tag_names)


predicted_tags_probabilities = model.predict(new_question_vector)

print("Probabilités prédites :", predicted_tags_probabilities)
