In [1]:
import joblib
import pandas as pd
import numpy as np
import spacy
from nltk.corpus import stopwords
import nltk
import statsmodels.api as sm

# Ensure that NLTK stopwords are downloaded
nltk.download('stopwords')

# Initialize Spacy model
nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])

# Load the models and objects
with open('App_folder/vectorizers_and_objects.pkl', 'rb') as f:
    vectorizers_and_objects = joblib.load(f)

lda_model = joblib.load('App_folder/lda_model.pkl')
nb_classifier = joblib.load('App_folder/nb_classifier.pkl')
logistic_regression_pipeline = joblib.load('App_folder/logistic_regression_pipeline.pkl')

# Extract vectorizers
vectorizer_LDA = vectorizers_and_objects['vectorizer_LDA']
vectorizer_Bayesian = vectorizers_and_objects['vectorizer_Bayesian']

# Define new topic names
new_topic_names = {
    'Topic_0': 'Weather Problem',
    'Topic_1': 'Staff at Gate',
    'Topic_2': 'Airport Price',
    'Topic_3': 'Seat options in Aircraft',
    'Topic_4': 'Customer Service',
    'Topic_5': 'Attendant Seat',
    'Topic_6': 'Luggage Hand for Priority',
    'Topic_7': 'Extra Fee for Luggage',
    'Topic_8': 'Quality/Price Experience',
    'Topic_9': 'Information on Extra',
    'Topic_10': 'Baggage in General',
    'Topic_11': 'Booking Problem',
    'Topic_12': 'Experience on Flight',
    'Topic_13': 'Price in General',
    'Topic_14': 'Seat and Legroom for Family and Child',
    'Topic_15': 'Priority Policy',
    'Topic_16': 'Week Destination Advise',
    'Topic_17': 'Announcement Problem',
    'Topic_18': 'Delay and Loss of Time'
}

# Topics to use for logistic regression
selected_topics = [
    'Staff at Gate',
    'Airport Price',
    'Customer Service',
    'Luggage Hand for Priority',
    'Quality/Price Experience',
    'Experience on Flight',
    'Seat and Legroom for Family and Child',
    'Week Destination Advise'
]

# Ensure the selected topics are in the new_topic_names dictionary
selected_topic_keys = [key for key, value in new_topic_names.items() if value in selected_topics]

# Function to filter nouns
def filter_nouns(text):
    doc = nlp(text)
    filtered_text = ' '.join([token.lemma_ for token in doc if token.pos_ in ['NOUN']])
    return filtered_text

# Function to filter adjectives
def filter_adjectives(text):
    doc = nlp(text)
    filtered_text = ' '.join([token.lemma_ for token in doc if token.pos_ in ['ADJ']])
    return filtered_text

def normalize_log_probabilities(log_probs):
    min_val = np.min(log_probs)
    max_val = np.max(log_probs)
    normalized = 2 * (log_probs - min_val) / (max_val - min_val) - 1
    return normalized



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Acer\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
import pandas as pd
import numpy as np

def process_comment(comment):
    print("Original comment:")
    print(comment)
    
    # Trasformazione del commento usando CountVectorizer
    filtered_nouns = filter_nouns(comment)  # Assicurati che questa funzione sia definita
    filtered_adjectives = filter_adjectives(comment)  # Assicurati che questa funzione sia definita
    
    print("\nFiltered Nouns:")
    print(filtered_nouns)
    
    print("\nFiltered Adjectives:")
    print(filtered_adjectives)
    
    # Trasformazione del testo nei vettori
    X_LDA = vectorizer_LDA.transform([filtered_nouns])
    X_Bayesian = vectorizer_Bayesian.transform([filtered_adjectives])
    
    print("\nLDA Vector:")
    print(X_LDA.toarray())
    
    print("\nBayesian Vector:")
    print(X_Bayesian.toarray())
    
    # Applicazione di LDA per ottenere probabilità dei temi
    lda_topic_prob = lda_model.transform(X_LDA).flatten()  # Assicurati che `lda_model` sia definito
    
    print("\nLDA Topic Probabilities:")
    print(lda_topic_prob)
    
    # Ottenere i punteggi di sentimenti per aggettivi da Naive Bayes
    adj_scores = nb_classifier.predict_proba(X_Bayesian)[0]  # Assicurati che `nb_classifier` sia definito
    
    # Normalizzare i punteggi (se necessario)
    adj_scores = normalize_log_probabilities(adj_scores)  # Assicurati che questa funzione sia definita
    
    print("\nAdjective Scores:")
    print(adj_scores)
    
    # Calcolare la media dei punteggi degli aggettivi
    avg_adj_score = np.mean(adj_scores)
    
    print("\nAverage Adjective Score:")
    print(avg_adj_score)
    
    # Calcolare i punteggi dei temi basati sulla media dei punteggi degli aggettivi e le probabilità dei temi
    topic_scores = lda_topic_prob * avg_adj_score
    
    print("\nTopic Scores (before renaming):")
    print(topic_scores)
    
    # Creare un DataFrame per i punteggi dei temi con nomi generici
    topic_scores_df = pd.DataFrame(topic_scores.reshape(1, -1), columns=['Topic_' + str(i) for i in range(len(topic_scores))])
    
    print("\nTopic Scores DataFrame (before renaming):")
    print(topic_scores_df)
    
    # Rinomina le colonne basandosi sui nuovi nomi dei temi
    topic_scores_df.rename(columns={f'Topic_{i}': name for i, name in enumerate(new_topic_names.values())}, inplace=True)
    
    print("\nTopic Scores DataFrame (after renaming):")
    print(topic_scores_df)
    
    # Filtra per includere solo i temi selezionati
    filtered_topic_scores_df = topic_scores_df[selected_topics]
    
    print("\nFiltered Topic Scores DataFrame:")
    print(filtered_topic_scores_df)
    
    # Rimuovere la parte della regressione logistica (se non necessaria) e aggiornare la predizione
    # Usare Naive Bayes per la predizione
    y_pred_prob = nb_classifier.predict_proba(X_Bayesian)
    y_pred = (y_pred_prob[:, 1] >= 0.4).astype(int)[0]
    
    print("\nPrediction Probability:")
    print(y_pred_prob)
    
    print("\nPrediction (1: Yes, 0: No):")
    print(y_pred)
    
    # Poiché non stai usando la regressione logistica, rimuovi il calcolo dei coefficienti e dei p-value
    # Restituisci solo il DataFrame ordinato e la predizione
    return y_pred, filtered_topic_scores_df


In [3]:

comment = "good flight."
recommendation, results_df = process_comment(comment)

print("\nRecommendation (1: Yes, 0: No):", recommendation)
print("\nResults DataFrame (sorted by p-value):")
print(results_df)


Original comment:
good flight.

Filtered Nouns:
flight

Filtered Adjectives:
good

LDA Vector:
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]

Bayesian Vector:
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]

LDA Topic Probabilities:
[0.02631579 0.02631579 0.02631579 0.02631579 0.52631578 0.02631579
 0.02631579 0.02631579 0.02631579 0.02631579 0.02631579 0.02631579
 0.02631579 0.02631579 0.02631579 0.02631579 0.02631579 0.02631579
 0.02631579]

Adjective Scores:
[-1.  1.]

Average Adjective Score:
0.0

Topic Scores (b

In [None]:
import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd
import numpy as np
from PIL import Image, ImageTk  # Make sure you have Pillow installed for image handling

# Define Ryanair's color scheme
RYANAIR_BLUE = '#0033A0'
RYANAIR_YELLOW = '#F7E300'

# Simulated data and models for the function
new_topic_names = {
    'Topic_0': 'Weather Problem',
    'Topic_1': 'Staff at Gate',
    'Topic_2': 'Airport Price',
    'Topic_3': 'Seat options in Aircraft',
    'Topic_4': 'Customer Service',
    'Topic_5': 'Attendant Seat',
    'Topic_6': 'Luggage Hand for Priority',
    'Topic_7': 'Extra Fee for Luggage',
    'Topic_8': 'Quality/Price Experience',
    'Topic_9': 'Information on Extra',
    'Topic_10': 'Baggage in General',
    'Topic_11': 'Booking Problem',
    'Topic_12': 'Experience on Flight',
    'Topic_13': 'Price in General',
    'Topic_14': 'Seat and Legroom for Family and Child',
    'Topic_15': 'Priority Policy',
    'Topic_16': 'Week Destination Advise',
    'Topic_17': 'Announcement Problem',
    'Topic_18': 'Delay and Loss of Time'
}

selected_topics = list(new_topic_names.values())  # Placeholder for actual selected topics
selected_topic_keys = list(new_topic_names.keys())  # Placeholder for topic keys

# Function to process comment
def process_comment(comment):
    # Placeholder simulation
    filtered_nouns = comment  # Placeholder
    filtered_adjectives = comment  # Placeholder
    X_LDA = vectorizer_LDA.transform([filtered_nouns])
    X_Bayesian = vectorizer_Bayesian.transform([filtered_adjectives])
    lda_topic_prob = lda_model.transform(X_LDA).flatten()  # Ensure it's a 1D array
    adj_scores = nb_classifier.predict_proba(X_Bayesian)[0]
    adj_scores = normalize_log_probabilities(adj_scores)
    avg_adj_score = np.mean(adj_scores)
    topic_scores = lda_topic_prob * avg_adj_score
    
    topic_scores_df = pd.DataFrame(topic_scores.reshape(1, -1), columns=['Topic_' + str(i) for i in range(len(topic_scores))])
    topic_scores_df.rename(columns={f'Topic_{i}': name for i, name in enumerate(new_topic_names.values())}, inplace=True)
    filtered_topic_scores_df = topic_scores_df[selected_topics]

    # Placeholder for logistic regression pipeline
    coefficients = np.random.uniform(-5, 5, len(selected_topics))
    p_values = np.random.uniform(0, 0.1, size=coefficients.shape)

    results_df = pd.DataFrame({
        'Coefficient': coefficients,
        'P-Value': p_values
    }, index=selected_topic_keys)
    
    results_df.index = results_df.index.map(new_topic_names)
    sorted_by_pvalue = results_df.sort_values(by='P-Value')
    y_pred_prob = nb_classifier.predict_proba(X_Bayesian)
    y_pred = (y_pred_prob[:, 1] >= 0.4).astype(int)[0]
    
    return y_pred, sorted_by_pvalue.reset_index()

# Function to handle button click
def on_analyze_button_click():
    comment = comment_text.get("1.0", tk.END).strip()
    if not comment:
        messagebox.showwarning("Input Error", "Please enter a comment.")
        return
    
    try:
        recommendation, results_df = process_comment(comment)
        
        # Update recommendation label
        recommendation_label.config(text=f"Recommendation: {'Yes' if recommendation == 1 else 'No'}")
        
        # Update Treeview with results
        for item in tree.get_children():
            tree.delete(item)
        
        for _, row in results_df.iterrows():
            tree.insert('', tk.END, values=(row['index'], row['Coefficient'], row['P-Value']))
    
    except Exception as e:
        messagebox.showerror("Processing Error", f"An error occurred: {e}")

# Create main window
root = tk.Tk()
root.title("Comment Analysis")

# Set the background color and font
root.configure(bg=RYANAIR_BLUE)

# Add the Ryanair logo if available
try:
    logo_image = Image.open('App_folder/ryanair-logo.png')  # Replace with the path to your logo file
    logo_image = logo_image.resize((150, 50), Image.LANCZOS)  # Resize the logo if necessary
    logo_photo = ImageTk.PhotoImage(logo_image)
    logo_label = tk.Label(root, image=logo_photo, bg=RYANAIR_BLUE)
    logo_label.image = logo_photo  # Keep a reference to avoid garbage collection
    logo_label.pack(pady=10)
except FileNotFoundError:
    print("Logo file not found. Please provide the correct path to the logo.")

# Widget for entering comment
tk.Label(root, text="Enter your comment:", bg=RYANAIR_BLUE, fg='white', font=('Arial', 12)).pack(pady=5)
comment_text = tk.Text(root, height=5, width=50, wrap=tk.WORD)
comment_text.pack(pady=5)

# Button to start analysis
analyze_button = tk.Button(root, text="Analyze", command=on_analyze_button_click, bg=RYANAIR_YELLOW, fg=RYANAIR_BLUE, font=('Arial', 12, 'bold'))
analyze_button.pack(pady=10)

# Label to display recommendation
recommendation_label = tk.Label(root, text="Recommendation: Not Available", bg=RYANAIR_BLUE, fg='white', font=('Arial', 12))
recommendation_label.pack(pady=5)

# Treeview to display results
columns = ('Topic', 'Coefficient', 'P-Value')
tree = ttk.Treeview(root, columns=columns, show='headings')
tree.heading('Topic', text='Topic', anchor=tk.W)
tree.heading('Coefficient', text='Coefficient', anchor=tk.W)
tree.heading('P-Value', text='P-Value', anchor=tk.W)
tree.pack(pady=10)

# Style Treeview
tree_style = ttk.Style()
tree_style.configure("Treeview",
                     background=RYANAIR_BLUE,
                     foreground='white',
                     fieldbackground=RYANAIR_BLUE)
tree_style.configure("Treeview.Heading",
                     background=RYANAIR_YELLOW,
                     foreground=RYANAIR_BLUE,
                     font=('Arial', 12, 'bold'))

# Start the Tkinter event loop
root.mainloop()


## Positive Example
I recently flew with Ryanair Airlines and was extremely impressed with their service. The staff was very friendly and helpful, and the airplane was clean and well-maintained. My flight experience was pleasant and smooth. The prices were also competitive compared to other airlines. I highly recommend Ryanair Airlines to anyone looking for a hassle-free flight.
## Negative Example
I had a terrible experience with Ryanair Airlines. The flight was significantly delayed with no clear explanation, and the staff didn’t seem particularly interested in resolving the issue. Additionally, the airplane was dirty and the seat conditions were uncomfortable. Not to mention the high cost compared to the services provided. I would not recommend Ryanair Airlines unless there's no better option