In [21]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
import tkinter as tk
from tkinter import *

# load the data
df = pd.read_excel("C:/Users/satk8/Desktop/dataset.xlsx",index_col=0)

# preprocess the text data
stop_words = stopwords.words('english')
stemmer = SnowballStemmer('english')
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [token for token in tokens if token not in stop_words]
    tokens = [stemmer.stem(token) for token in tokens]
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)
# apply to dataframe
df['Search Query'] = df['Search Query'].apply(preprocess)
df['Clicked Result'] = df['Clicked Result'].apply(preprocess)

# convert gender values to numerical
df['Gender'] = df['Gender'].apply(lambda x: 1 if x.lower() == 'female' else 0)

# convert the preprocessed text into numerical data using TfidfVectorizer
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Search Query'])
X = X.toarray() # convert sparse matrix to dense array
y = df['Gender'].values

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# define the neural network architecture
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

# compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# define early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=30)

# train the model
model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stop])

# define a function to make predictions on user input
def predict_gender(search_query):
    search_query = preprocess(search_query)
    new_data = [search_query]
    new_data = vectorizer.transform(new_data)
    new_data = new_data.toarray()
    prediction = model.predict(new_data)[0][0]
    if prediction > 0.5:
        return "female"
    else:
        return "male"

# define the GUI function
def create_gui():
    # create the main window
    root = tk.Tk()
    root.title("Gender Prediction")
    
    # define the labels and entry widgets
    search_query_label = tk.Label(root, text="Enter a search query:")
    search_query_entry = tk.Entry(root, width=50)
    prediction_label = tk.Label(root, text="")
    similar_words_label = tk.Label(root, text="")
    
    # define the predict function
    def predict():
        # get the search query entered by the user
        search_query = search_query_entry.get()

        # preprocess the search query text
        preprocessed_search_query = preprocess(search_query)

        # convert the preprocessed text into numerical data using TfidfVectorizer
        vectorized_search_query = vectorizer.transform([preprocessed_search_query]).toarray()

        # make a prediction using the trained neural network
        prediction = model.predict(vectorized_search_query)[0][0]

        # convert the prediction to a binary label
        binary_prediction = 1 if prediction > 0.5 else 0

        # display the prediction result
        if binary_prediction == 1:
            prediction_label.config(text="The model predicted that the user who searched for '{}' is female ({}%).".format(search_query, round(prediction * 100, 3)))
        else:
            prediction_label.config(text="The model predicted that the user who searched for '{}' is male ({}%).".format(search_query, round((1 - prediction) * 100, 3)))

        # get the similar words from the database
        similar_words = df[df['Search Query'] == search_query]['Clicked Result'].values

        # display the similar words
        similar_words_label.config(text="Similar words: {}".format(', '.join(similar_words)))
    
    # create a predict button
    predict_button = tk.Button(root, text="Predict gender", command=predict)
    predict_button.pack()

    # create a search query label and entry
    search_query_label.pack()
    search_query_entry.pack()

    # create a prediction label
    prediction_label.pack()

    # create a similar words label
    similar_words_label.pack()
    
    # bind the enter key to the predict function
    search_query_entry.bind("<Return>", predict)

    # run the tkinter event loop
    root.mainloop()

if __name__ == "__main__":
    create_gui()

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
