***Model Traning Code***

In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import re
import time
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from scipy.sparse import hstack, csr_matrix
import nltk
import string
from nltk.sentiment import SentimentIntensityAnalyzer  # Changed from TextBlob to VADER

# # Load NLTK resources
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('vader_lexicon')  # Download VADER lexicon

# Initialize VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Load data
twitter_data = pd.read_csv('dataset_1 (1).csv', encoding='ISO-8859-1')

# Cleaning function
def clean_tweet(tweet):
    tweet = re.sub(r'<.*?>', '', tweet)  # Remove HTML tags
    tweet = re.sub(r'http\S+|www\S+|https\S+', '', tweet, flags=re.MULTILINE)  # Remove URLs
    tweet = re.sub(r'@\w+', '', tweet)  # Remove mentions
    tweet = re.sub(r'#\w+', '', tweet)  # Remove hashtags
    tweet = tweet.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    tweet = tweet.lower()  # Convert to lowercase
    tweet = re.sub(r'\d+', '', tweet)  # Remove digits
    tweet = re.sub(r'\s+', ' ', tweet).strip()  # Remove extra spaces
    return tweet

# Apply cleaning
twitter_data['cleaned_text'] = twitter_data['text'].apply(clean_tweet)

# Tokenization
def tokenize_tweet(tweet):
    return word_tokenize(tweet)

twitter_data['tokenized_text'] = twitter_data['cleaned_text'].apply(tokenize_tweet)

# Stop words
stop_words = set(stopwords.words('english'))
negation_words = {"not", "no", "never", "n't"}  # Keep negation words

# Remove stop words but keep negation words
def remove_stopwords(tokens):
    return [word for word in tokens if word not in stop_words or word in negation_words]

twitter_data['no_stopwords_text'] = twitter_data['tokenized_text'].apply(remove_stopwords)

# *New Sentiment Analysis Using VADER*
def get_sentiment(tweet):
    sentiment_score = sia.polarity_scores(tweet)  # Get sentiment scores
    return sentiment_score['compound']  # Return overall polarity (-1 to +1)

# Apply VADER Sentiment Analysis
twitter_data['polarity'] = twitter_data['cleaned_text'].apply(get_sentiment)

# Print processed results
print(twitter_data[['text', 'cleaned_text', 'no_stopwords_text', 'polarity']].head())

# Stemming function
port_stem = PorterStemmer()
def stemming(content):
    if not isinstance(content, str):
        content = str(content)
    stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [port_stem.stem(word) for word in stemmed_content if word not in stop_words]
    return ' '.join(stemmed_content)

# Batch processing for stemming
def batch_process_stemming(data, batch_size=10000):
    stemmed_contents = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i + batch_size]
        stemmed_batch = batch.apply(stemming)
        stemmed_contents.extend(stemmed_batch)
    return stemmed_contents

# Apply stemming
twitter_data['stemmed_content'] = batch_process_stemming(twitter_data['no_stopwords_text'].apply(lambda x: ' '.join(x)))

# Prepare feature and target variables
X = twitter_data['stemmed_content'].values
Y = twitter_data['target'].values

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, stratify=Y, random_state=2)

# Check if 'polarity' column exists
if 'polarity' in twitter_data.columns:
    numeric_features = twitter_data[['polarity']] # Replace or add more features if applicable
    
    # Split numeric features based on train-test split
    numeric_train, numeric_test = train_test_split(numeric_features, test_size=0.3, stratify=Y, random_state=2)

    # Convert to NumPy arrays
    numeric_train_array = numeric_train.values
    numeric_test_array = numeric_test.values

    # Scale numeric features
    scaler = StandardScaler()
    zscore_train = scaler.fit_transform(numeric_train_array)
    zscore_test = scaler.transform(numeric_test_array)

    # Convert to sparse matrices
    zscore_train_sparse = csr_matrix(zscore_train)
    zscore_test_sparse = csr_matrix(zscore_test)

    # Time for TF-IDF vectorization
    start_time = time.time()
    vectorizer = TfidfVectorizer()
    X_train_tfidf = vectorizer.fit_transform(X_train)
    X_test_tfidf = vectorizer.transform(X_test)
    vectorization_time = time.time() - start_time
    print(f"TF-IDF Vectorization Time: {vectorization_time:.4f} seconds")

    # Combine features
    combined_train_sparse = hstack([X_train_tfidf, zscore_train_sparse])
    combined_test_sparse = hstack([X_test_tfidf, zscore_test_sparse])

    # Initialize and fit the SVM model
    svm_model = SVC(kernel='linear')
    svm_model.fit(combined_train_sparse, Y_train)

    # Predictions for test data
    X_test_prediction = svm_model.predict(combined_test_sparse)

    # Evaluation metrics for test data
    test_precision = precision_score(Y_test, X_test_prediction, average='weighted')
    test_recall = recall_score(Y_test, X_test_prediction, average='weighted')
    test_f1 = f1_score(Y_test, X_test_prediction, average='weighted')

    print(f'Test Data - Precision: {test_precision:.4f}')
    print(f'Test Data - Recall: {test_recall:.4f}')
    print(f'Test Data - F1 Score: {test_f1:.4f}')

    # Accuracy score on testing data
    test_data_accuracy = accuracy_score(Y_test, X_test_prediction)
    print('Accuracy score of the test data:', test_data_accuracy)

    # Predictions for training data
    X_train_prediction = svm_model.predict(combined_train_sparse)     # with blancing factor

    # Evaluation metrics for training data
    train_precision = precision_score(Y_train, X_train_prediction, average='weighted')
    train_recall = recall_score(Y_train, X_train_prediction, average='weighted')
    train_f1 = f1_score(Y_train, X_train_prediction, average='weighted')

    print(f'Training Data - Precision: {train_precision:.4f}')
    print(f'Training Data - Recall: {train_recall:.4f}')
    print(f'Training Data - F1 Score: {train_f1:.4f}')

    # Accuracy score on training data
    training_data_accuracy = accuracy_score(Y_train, X_train_prediction)
    print('Accuracy score of the training data:', training_data_accuracy)
else:
    print("Error: 'polarity' column not found in the dataset.")

# End measuring time and calculate the elapsed time
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time:.2f} seconds")

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


                                                text  \
0  @crnixon i'd be interested in speaking, but no...   
1  @rachelhart @rachelhart You really like changi...   
2  Off to Ciara's swimming lesson later, looking ...   
3   pain errrg! stupid cold and stupid people con...   
4  @ddlovato CONGRATULATIONS DEMI  . YOU'RE THE B...   

                                        cleaned_text  \
0  id be interested in speaking but no idea what ...   
1  you really like changing you profile pic dont you   
2  off to ciaras swimming lesson later looking fo...   
3  pain errrg stupid cold and stupid people confu...   
4  congratulations demi youre the best i love you lt   

                                   no_stopwords_text  polarity  
0  [id, interested, speaking, no, idea, id, talk,...    0.3182  
1       [really, like, changing, profile, pic, dont]    0.4201  
2  [ciaras, swimming, lesson, later, looking, for...    0.0000  
3  [pain, errrg, stupid, cold, stupid, people, co...   -0.9001  
4

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import re
import time
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler
from scipy.sparse import hstack, csr_matrix
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
import string
from xgboost import XGBClassifier

# Load NLTK resources
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('vader_lexicon')

# Initialize VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Load data
twitter_data = pd.read_csv('dataset_1 (1).csv', encoding='ISO-8859-1')

# Cleaning function
def clean_tweet(tweet):
    tweet = re.sub(r'<.*?>', '', tweet)
    tweet = re.sub(r'http\S+|www\S+|https\S+', '', tweet)
    tweet = re.sub(r'@\w+', '', tweet)
    tweet = re.sub(r'#\w+', '', tweet)
    tweet = tweet.translate(str.maketrans('', '', string.punctuation))
    tweet = tweet.lower()
    tweet = re.sub(r'\d+', '', tweet)
    tweet = re.sub(r'\s+', ' ', tweet).strip()
    return tweet

# Apply cleaning
twitter_data['cleaned_text'] = twitter_data['text'].apply(clean_tweet)

# Tokenization
twitter_data['tokenized_text'] = twitter_data['cleaned_text'].apply(word_tokenize)

# Stop words
stop_words = set(stopwords.words('english'))
negation_words = {"not", "no", "never", "n't"}

# Remove stopwords (keep negation words)
def remove_stopwords(tokens):
    return [word for word in tokens if word not in stop_words or word in negation_words]

twitter_data['no_stopwords_text'] = twitter_data['tokenized_text'].apply(remove_stopwords)

# Sentiment analysis using VADER
def get_sentiment(tweet):
    sentiment_score = sia.polarity_scores(tweet)
    return sentiment_score['compound']

twitter_data['polarity'] = twitter_data['cleaned_text'].apply(get_sentiment)

# Stemming
port_stem = PorterStemmer()
def stemming(content):
    if not isinstance(content, str):
        content = str(content)
    stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [port_stem.stem(word) for word in stemmed_content if word not in stop_words]
    return ' '.join(stemmed_content)

# Batch stemming
def batch_process_stemming(data, batch_size=10000):
    stemmed_contents = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i + batch_size]
        stemmed_batch = batch.apply(stemming)
        stemmed_contents.extend(stemmed_batch)
    return stemmed_contents

# Apply stemming
twitter_data['stemmed_content'] = batch_process_stemming(twitter_data['no_stopwords_text'].apply(lambda x: ' '.join(x)))

# Prepare features and target
X = twitter_data['stemmed_content'].values
Y = twitter_data['target'].values

# Split dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, stratify=Y, random_state=2)

# Feature: polarity
numeric_features = twitter_data[['polarity']]
numeric_train, numeric_test = train_test_split(numeric_features, test_size=0.3, stratify=Y, random_state=2)

# Scale numeric features
scaler = StandardScaler()
zscore_train = scaler.fit_transform(numeric_train.values)
zscore_test = scaler.transform(numeric_test.values)

# Convert to sparse matrix
zscore_train_sparse = csr_matrix(zscore_train)
zscore_test_sparse = csr_matrix(zscore_test)

# TF-IDF Vectorization
start_time = time.time()
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)
vectorization_time = time.time() - start_time
print(f"TF-IDF Vectorization Time: {vectorization_time:.4f} seconds")

# Combine TF-IDF + polarity
combined_train_sparse = hstack([X_train_tfidf, zscore_train_sparse])
combined_test_sparse = hstack([X_test_tfidf, zscore_test_sparse])

# XGBoost Model
xgb_model = XGBClassifier(
    objective='binary:logistic',
    eval_metric='logloss',
    use_label_encoder=False,
    n_estimators=200,
    learning_rate=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

# Train XGBoost
xgb_model.fit(combined_train_sparse, Y_train)

# Predictions - Test
X_test_prediction = xgb_model.predict(combined_test_sparse)
test_precision = precision_score(Y_test, X_test_prediction, average='weighted')
test_recall = recall_score(Y_test, X_test_prediction, average='weighted')
test_f1 = f1_score(Y_test, X_test_prediction, average='weighted')
test_accuracy = accuracy_score(Y_test, X_test_prediction)

print(f'XGBoost Test Data - Precision: {test_precision:.4f}')
print(f'XGBoost Test Data - Recall: {test_recall:.4f}')
print(f'XGBoost Test Data - F1 Score: {test_f1:.4f}')
print(f'XGBoost Test Data - Accuracy: {test_accuracy:.4f}')

# Predictions - Train
X_train_prediction = xgb_model.predict(combined_train_sparse)
train_precision = precision_score(Y_train, X_train_prediction, average='weighted')
train_recall = recall_score(Y_train, X_train_prediction, average='weighted')
train_f1 = f1_score(Y_train, X_train_prediction, average='weighted')
train_accuracy = accuracy_score(Y_train, X_train_prediction)

print(f'XGBoost Train Data - Precision: {train_precision:.4f}')
print(f'XGBoost Train Data - Recall: {train_recall:.4f}')
print(f'XGBoost Train Data - F1 Score: {train_f1:.4f}')
print(f'XGBoost Train Data - Accuracy: {train_accuracy:.4f}')

# Total elapsed time
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time:.2f} seconds")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


TF-IDF Vectorization Time: 1.1503 seconds


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost Test Data - Precision: 0.7550
XGBoost Test Data - Recall: 0.7515
XGBoost Test Data - F1 Score: 0.7505
XGBoost Test Data - Accuracy: 0.7515
XGBoost Train Data - Precision: 0.7723
XGBoost Train Data - Recall: 0.7687
XGBoost Train Data - F1 Score: 0.7678
XGBoost Train Data - Accuracy: 0.7687
Elapsed time: 32.27 seconds


In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import re
import time
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from scipy.sparse import hstack, csr_matrix
import nltk

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('vader_lexicon')

# Initialize VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Load data
twitter_data = pd.read_csv('dataset_1 (1).csv', encoding='ISO-8859-1')

# Cleaning function
def clean_tweet(tweet):
    tweet = re.sub(r'<.*?>', '', tweet)
    tweet = re.sub(r'http\S+|www\S+|https\S+', '', tweet, flags=re.MULTILINE)
    tweet = re.sub(r'@\w+', '', tweet)
    tweet = re.sub(r'#\w+', '', tweet)
    tweet = tweet.translate(str.maketrans('', '', string.punctuation))
    tweet = tweet.lower()
    tweet = re.sub(r'\d+', '', tweet)
    tweet = re.sub(r'\s+', ' ', tweet).strip()
    return tweet

twitter_data['cleaned_text'] = twitter_data['text'].apply(clean_tweet)

# Tokenization and Stopword Removal
stop_words = set(stopwords.words('english'))
negation_words = {"not", "no", "never", "n't"}

def preprocess_tokens(tweet):
    tokens = word_tokenize(tweet)
    return [word for word in tokens if word not in stop_words or word in negation_words]

twitter_data['tokens'] = twitter_data['cleaned_text'].apply(preprocess_tokens)

# Stemming
port_stem = PorterStemmer()
def stemming(tokens):
    return ' '.join([port_stem.stem(word) for word in tokens])

twitter_data['stemmed_content'] = twitter_data['tokens'].apply(stemming)

# VADER polarity
twitter_data['polarity'] = twitter_data['cleaned_text'].apply(lambda x: sia.polarity_scores(x)['compound'])

# Additional Features
twitter_data['text_length'] = twitter_data['text'].apply(lambda x: len(x))
twitter_data['exclamations'] = twitter_data['text'].apply(lambda x: x.count('!'))
twitter_data['all_caps'] = twitter_data['text'].apply(lambda x: sum(1 for w in x.split() if w.isupper()))

# Features and Labels
X_text = twitter_data['stemmed_content']
Y = twitter_data['target']

# Train-test split
X_train_text, X_test_text, y_train, y_test, train_df, test_df = train_test_split(
    X_text, Y, twitter_data, test_size=0.3, stratify=Y, random_state=2
)

# TF-IDF Vectorizer with unigrams and bigrams
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=10000)
X_train_tfidf = vectorizer.fit_transform(X_train_text)
X_test_tfidf = vectorizer.transform(X_test_text)

# Numerical Features
num_features = ['polarity', 'text_length', 'exclamations', 'all_caps']
scaler = StandardScaler()
X_train_num = scaler.fit_transform(train_df[num_features])
X_test_num = scaler.transform(test_df[num_features])

X_train_combined = hstack([X_train_tfidf, csr_matrix(X_train_num)])
X_test_combined = hstack([X_test_tfidf, csr_matrix(X_test_num)])

# Train SVM with hyperparameter tuning
params = {'C': [0.01, 0.1, 1, 10], 'class_weight': [None, 'balanced']}
grid = GridSearchCV(LinearSVC(), params, scoring='f1_weighted', cv=5)
grid.fit(X_train_combined, y_train)

# Best model
best_model = grid.best_estimator_
y_pred_test = best_model.predict(X_test_combined)
y_pred_train = best_model.predict(X_train_combined)

# Evaluation
print("Best SVM Parameters:", grid.best_params_)
print("\n--- Test Data Metrics ---")
print("Accuracy:", accuracy_score(y_test, y_pred_test))
print("Precision:", precision_score(y_test, y_pred_test, average='weighted'))
print("Recall:", recall_score(y_test, y_pred_test, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred_test, average='weighted'))

print("\n--- Train Data Metrics ---")
print("Accuracy:", accuracy_score(y_train, y_pred_train))
print("Precision:", precision_score(y_train, y_pred_train, average='weighted'))
print("Recall:", recall_score(y_train, y_pred_train, average='weighted'))
print("F1 Score:", f1_score(y_train, y_pred_train, average='weighted'))


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Best SVM Parameters: {'C': 0.1, 'class_weight': 'balanced'}

--- Test Data Metrics ---
Accuracy: 0.7811666666666667
Precision: 0.7816842332443094
Recall: 0.7811666666666667
F1 Score: 0.7810455675157685

--- Train Data Metrics ---
Accuracy: 0.8156571428571429
Precision: 0.8160192817760887
Recall: 0.8156571428571429
F1 Score: 0.8155917887643632


***Model Saving in .pkl format***

In [3]:
import joblib

# Save the trained SVM model
joblib.dump(svm_model, 'sentiment_svm_model.pkl')

# Save the TF-IDF vectorizer
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

# Save the StandardScaler
joblib.dump(scaler, 'scaler.pkl')

print("Model, vectorizer, and scaler saved successfully!")


Model, vectorizer, and scaler saved successfully!


***Using the SVM model in the Tool    (Part 1 )***

In [4]:
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import joblib
import numpy as np
import pandas as pd
from scipy.sparse import hstack, csr_matrix
from nltk.sentiment import SentimentIntensityAnalyzer
import threading
import matplotlib.pyplot as plt
from io import BytesIO
from PIL import Image, ImageTk
import re
import nltk
import seaborn as sns
from nltk.corpus import stopwords

# Download stopwords
nltk.download("stopwords", quiet=True)
stop_words = set(stopwords.words("english"))

# Load the saved SVM model, vectorizer, and scaler
svm_model = joblib.load("sentiment_svm_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")
scaler = joblib.load("scaler.pkl")

# Initialize VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    """Analyzes sentiment of a single text string."""
    sentence_polarity = sia.polarity_scores(text)["compound"]
    text_tfidf = vectorizer.transform([text])
    polarity_array = np.array([[sentence_polarity]])
    scaled_polarity = scaler.transform(polarity_array)
    scaled_polarity_sparse = csr_matrix(scaled_polarity)
    combined_features = hstack([text_tfidf, scaled_polarity_sparse])

    if combined_features.shape[1] != svm_model.n_features_in_:
        return None, None, f"Feature mismatch: Model expects {svm_model.n_features_in_} features, but got {combined_features.shape[1]}."

    prediction = svm_model.predict(combined_features)[0]

    if sentence_polarity > 0.1:
        sentiment = "Positive"
    elif -0.1 <= sentence_polarity <= 0.1:
        sentiment = "Neutral"
    else:
        sentiment = "Negative"

    return sentiment, sentence_polarity, None  # No error

def clean_tweet(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
    text = re.sub(r"\@\w+|\#", "", text)
    text = re.sub(r"[^\w\s]", "", text)
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text

categories = {
    "Technology": ["tech", "software", "AI", "data", "computer", "cloud", "robotics", "internet"],
    "Entertainment": ["movie", "music", "show", "film", "concert", "actor", "director"],
    "Politics": ["election", "government", "president", "policy", "vote", "law", "senate"],
    "Sports": ["football", "basketball", "cricket", "match", "goal", "tennis", "athlete"],
    "General Fun": ["fun", "joke", "meme", "party", "game", "laugh", "entertainment"],
    "News": ["breaking", "news", "report", "update", "headline", "journalist"]
}

def classify_tweet(text):
    for category, keywords in categories.items():
        if any(word in text for word in keywords):
            return category
    return "Other"

def analyze_dataset_thread(filepath):
    """Analyzes sentiment and classifies tweets of a dataset in a separate thread and updates the UI in the same window."""
    try:
        if filepath.endswith('.csv'):
            df = pd.read_csv(filepath)
        elif filepath.endswith(('.xlsx', '.xls')):
            df = pd.read_excel(filepath)
        elif filepath.endswith('.txt'):
            df = pd.read_csv(filepath, sep='\t')

        text_column = None
        for col in df.columns:
            if "text" in col.lower() or "review" in col.lower() or "comment" in col.lower() or "content" in col.lower() or "message" in col.lower() or "description" in col.lower():
                text_column = col
                break

        if text_column is None:
            messagebox.showerror("Error", "Could not find a suitable text column for sentiment analysis.")
            return

        total_rows = len(df)
        results = []
        sentiment_counts = {"Positive": 0, "Neutral": 0, "Negative": 0}
        category_counts = {}

        df["cleaned_tweet"] = df[text_column].apply(clean_tweet)
        df["category"] = df["cleaned_tweet"].apply(classify_tweet)

        for category in df["category"].unique():
            category_counts[category] = len(df[df["category"] == category])

        for index, text in enumerate(df[text_column]):
            if isinstance(text, str):
                sentiment, polarity, error = analyze_sentiment(text)
                if error:
                    messagebox.showerror("Error", error)
                    return
                results.append((index + 1, text, sentiment, polarity, df.loc[index, "category"]))
                sentiment_counts[sentiment] += 1
            else:
                results.append((index + 1, text, "N/A", "N/A", df.loc[index, "category"]))

            progress_var.set((index + 1) / total_rows * 100)
            progress_label.config(text=f"Processing: {int(progress_var.get())}%")
            root.update_idletasks()

        for row in tree.get_children():
            tree.delete(row)

        for index, (serial, text, sentiment, polarity, category) in enumerate(results):
            tag = "oddrow" if index % 2 == 0 else "evenrow"
            tree.insert("", tk.END, values=(serial, text, sentiment, polarity, category), tags=(tag,))

        progress_label.config(text="Processing Completed.")

        global pie_chart_img, bar_chart_img, analyzed_df
        pie_chart_img = create_pie_chart_image(sentiment_counts)
        bar_chart_img = create_bar_chart_image(category_counts)

        analyzed_df = pd.DataFrame(results, columns=["Serial", "Text", "Sentiment", "Polarity", "Category"])

    except FileNotFoundError:
        messagebox.showerror("Error", "File not found.")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred: {e}")

def create_bar_chart_image(category_counts):
    plt.figure(figsize=(6, 4))
    sns.barplot(x=list(category_counts.keys()), y=list(category_counts.values()), palette="Set2")
    plt.xlabel("Categories")
    plt.ylabel("Tweet Count")
    plt.title("Tweet Category Distribution")
    plt.xticks(rotation=30, fontsize=8)
    plt.yticks(fontsize=8)
    plt.tight_layout()
    buf = BytesIO()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)
    img = Image.open(buf)
    img_tk = ImageTk.PhotoImage(img)
    return img_tk

def create_pie_chart_image(sentiment_counts):
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.pie(sentiment_counts.values(), labels=sentiment_counts.keys(), autopct='%1.1f%%', startangle=90)
    ax.axis('equal')
    buf = BytesIO()
    plt.savefig(buf, format='png')
    plt.close(fig)
    buf.seek(0)
    img = Image.open(buf)
    img_tk = ImageTk.PhotoImage(img)
    return img_tk  

def analyze_dataset():
    filepath = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv"), ("Excel files", "*.xlsx;*.xls"), ("Text files", "*.txt")])
    if not filepath:
        return
    thread = threading.Thread(target=analyze_dataset_thread, args=(filepath,))
    thread.start()

def analyze_sentence():
    text = text_entry.get("1.0", tk.END).strip()
    if not text:
        messagebox.showerror("Error", "Please enter a sentence.")
        return
    sentiment, polarity, error = analyze_sentiment(text)
    if error:
        messagebox.showerror("Error", error)
        return
    sentiment_label.config(text=f"Sentiment: {sentiment}")
    polarity_label.config(text=f"Polarity Score:{polarity:.4f}")

def show_charts():
    if 'pie_chart_img' not in globals() or 'bar_chart_img' not in globals():
        messagebox.showerror("Error", "Please analyze a dataset first.")
        return

    charts_window = tk.Toplevel(root)
    charts_window.title("Charts")

    # Create a frame to hold the charts side-by-side
    charts_frame = tk.Frame(charts_window)
    charts_frame.pack(pady=10)

    pie_label = tk.Label(charts_frame, image=pie_chart_img)
    pie_label.pack(side=tk.LEFT, padx=10)

    bar_label = tk.Label(charts_frame, image=bar_chart_img)
    bar_label.pack(side=tk.LEFT, padx=10)

def download_table():
    global analyzed_df
    if 'analyzed_df' not in globals() or analyzed_df is None:
        messagebox.showerror("Error", "Please analyze a dataset first.")
        return

    filepath = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])
    if not filepath:
        return

    try:
        analyzed_df.to_csv(filepath, index=False)
        messagebox.showinfo("Success", "Table downloaded successfully.")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during download: {e}")

# Create GUI
root = tk.Tk()
root.title("Sentiment Analysis and Category Classification")
root.state('zoomed')
root.config(bg="#f0f8ff")

title_label = tk.Label(root, text="Sentiment Analysis and Category Classification", font=("Helvetica", 18, "bold"), bg="#f0f8ff", fg="#4b8b3b")
title_label.pack(pady=10)

text_entry = tk.Text(root, height=5, width=90, font=("Helvetica", 12))
text_entry.pack(pady=10)

analyze_sentence_button = tk.Button(root, text="Analyze Sentence", command=analyze_sentence, font=("Helvetica", 12), bg="#4b8b3b", fg="white", relief="raised", width=20)
analyze_sentence_button.pack(pady=5)

sentiment_label = tk.Label(root, text="Sentiment: ", font=("Helvetica", 12, "bold"), bg="#f0f8ff")
sentiment_label.pack()

polarity_label = tk.Label(root, text="Polarity Score: ", font=("Helvetica", 12, "bold"), bg="#f0f8ff")
polarity_label.pack()

# Create a frame to hold the buttons horizontally
button_frame = tk.Frame(root, bg="#f0f8ff")
button_frame.pack(pady=10)

analyze_dataset_button = tk.Button(button_frame, text="Analyze Dataset", command=analyze_dataset, font=("Helvetica", 12), bg="#4b8b3b", fg="white", relief="raised", width=20)
analyze_dataset_button.pack(side=tk.LEFT, padx=5)

show_charts_button = tk.Button(button_frame, text="Show Charts", command=show_charts, font=("Helvetica", 12), bg="#4b8b3b", fg="white", relief="raised", width=20)
show_charts_button.pack(side=tk.LEFT, padx=5)

download_table_button = tk.Button(button_frame, text="Download Table", command=download_table, font=("Helvetica", 12), bg="#4b8b3b", fg="white", relief="raised", width=20)
download_table_button.pack(side=tk.LEFT, padx=5)

progress_frame = tk.Frame(root, bg="#f0f8ff")
progress_frame.pack(pady=10, fill=tk.BOTH, expand=True)

progress_var = tk.DoubleVar()
progress_bar = ttk.Progressbar(progress_frame, variable=progress_var, maximum=100)
progress_bar.pack(pady=3, fill=tk.X, padx=20)

progress_label = tk.Label(progress_frame, text="Processing: 0%", bg="#f0f8ff", font=("Helvetica", 12))
progress_label.pack(pady=5)

style = ttk.Style()
style.configure("Treeview", rowheight=25, font=("Helvetica", 12))
style.configure("Treeview.Heading", font=("Helvetica", 12, "bold"))
style.layout("Treeview", [('Treeview.treearea', {'sticky': 'nswe'})])
style.map("Treeview", background=[("selected", "#4b8b3b")])

tree = ttk.Treeview(progress_frame, columns=("Serial", "Text", "Sentiment", "Polarity", "Category"), show="headings", height=10, style="Treeview")
tree.heading("Serial", text="Serial")
tree.heading("Text", text="Text")
tree.heading("Sentiment", text="Sentiment")
tree.heading("Polarity", text="Polarity")
tree.heading("Category", text="Category")

tree.column("Serial", width=50, anchor="center")
tree.column("Text", width=200, anchor="center")
tree.column("Sentiment", width=100, anchor="center")
tree.column("Polarity", width=80, anchor="center")
tree.column("Category", width=120, anchor="center")

tree["show"] = "headings"
tree.tag_configure("oddrow", background="#f9f9f9")
tree.tag_configure("evenrow", background="#e6e6e6")
tree.pack(fill=tk.BOTH, expand=True, side=tk.LEFT)

root.mainloop()

# Part 2

In [3]:
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import joblib
import numpy as np
import pandas as pd
from scipy.sparse import hstack, csr_matrix
from nltk.sentiment import SentimentIntensityAnalyzer
import threading
import matplotlib.pyplot as plt
from io import BytesIO
from PIL import Image, ImageTk
import re
import nltk
import seaborn as sns
from nltk.corpus import stopwords
from PIL import Image, ImageTk  # Import PIL for image handling

# Download stopwords
nltk.download("stopwords", quiet=True)
stop_words = set(stopwords.words("english"))

# Load the saved SVM model, vectorizer, and scaler
svm_model = joblib.load("sentiment_svm_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")
scaler = joblib.load("scaler.pkl")

# Initialize VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    """Analyzes sentiment of a single text string."""
    sentence_polarity = sia.polarity_scores(text)["compound"]
    text_tfidf = vectorizer.transform([text])
    polarity_array = np.array([[sentence_polarity]])
    scaled_polarity = scaler.transform(polarity_array)
    scaled_polarity_sparse = csr_matrix(scaled_polarity)
    combined_features = hstack([text_tfidf, scaled_polarity_sparse])

    if combined_features.shape[1] != svm_model.n_features_in_:
        return None, None, f"Feature mismatch: Model expects {svm_model.n_features_in_} features, but got {combined_features.shape[1]}."

    prediction = svm_model.predict(combined_features)[0]

    if sentence_polarity > 0.1:
        sentiment = "Positive"
    elif -0.1 <= sentence_polarity <= 0.1:
        sentiment = "Neutral"
    else:
        sentiment = "Negative"

    return sentiment, sentence_polarity, None  # No error

def clean_tweet(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
    text = re.sub(r"\@\w+|\#", "", text)
    text = re.sub(r"[^\w\s]", "", text)
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text

categories = {
    "Technology": ["tech", "software", "AI", "data", "computer", "cloud", "robotics", "internet"],
    "Entertainment": ["movie", "music", "show", "film", "concert", "actor", "director"],
    "Politics": ["election", "government", "president", "policy", "vote", "law", "senate"],
    "Sports": ["football", "basketball", "cricket", "match", "goal", "tennis", "athlete"],
    "General Fun": ["fun", "joke", "meme", "party", "game", "laugh", "entertainment"],
    "News": ["breaking", "news", "report", "update", "headline", "journalist"],
    "Healthcare": ["health", "medical", "doctor", "hospital", "patient", "disease", "treatment", "vaccine", "covid", "pandemic", "symptom", "diagnosis", "pharmacy", "medicine", "virus", "wellness"],
    "Education": ["school", "college", "university", "student", "teacher", "learn", "study", "education"],
    "Finance": ["stock", "market", "money", "bank", "finance", "investment", "economy"],
    "Travel": ["travel", "trip", "vacation", "tour", "flight", "hotel", "destination"],
    "Food": ["food", "recipe", "restaurant", "cuisine", "eat", "cook", "dish"],
    "Fashion": ["fashion", "style", "clothing", "design", "trend", "apparel"],
    "Environment": ["climate", "environment", "pollution", "sustainability", "eco", "green"],
    "Business": ["business", "company", "startup", "entrepreneur", "market", "sales", "product", "innovation"],
    "Art": ["art", "painting", "sculpture", "artist", "exhibition", "gallery", "creative"],
    "Books": ["book", "reading", "author", "literature", "novel", "poetry", "library"],
    "Gaming": ["game", "gaming", "videogame", "console", "player", "esports", "virtual"]
}

def classify_tweet(text):
    for category, keywords in categories.items():
        if any(word in text for word in keywords):
            return category
    return "Other"

def analyze_dataset_thread(filepath):
    """Analyzes sentiment and classifies tweets of a dataset in a separate thread and updates the UI in the same window."""
    try:
        if filepath.endswith('.csv'):
            df = pd.read_csv(filepath)
        elif filepath.endswith(('.xlsx', '.xls')):
            df = pd.read_excel(filepath)
        elif filepath.endswith('.txt'):
            df = pd.read_csv(filepath, sep='\t')

        text_column = None
        for col in df.columns:
            if "text" in col.lower() or "review" in col.lower() or "comment" in col.lower() or "content" in col.lower() or "message" in col.lower() or "description" in col.lower():
                text_column = col
                break

        if text_column is None:
            messagebox.showerror("Error", "Could not find a suitable text column for sentiment analysis.")
            return

        total_rows = len(df)
        results = []
        sentiment_counts = {"Positive": 0, "Neutral": 0, "Negative": 0}
        category_counts = {}

        df["cleaned_tweet"] = df[text_column].apply(clean_tweet)
        df["category"] = df["cleaned_tweet"].apply(classify_tweet)

        for category in df["category"].unique():
            category_counts[category] = len(df[df["category"] == category])

        for index, text in enumerate(df[text_column]):
            if isinstance(text, str):
                sentiment, polarity, error = analyze_sentiment(text)
                if error:
                    messagebox.showerror("Error", error)
                    return
                results.append((index + 1, text, sentiment, polarity, df.loc[index, "category"]))
                sentiment_counts[sentiment] += 1
            else:
                results.append((index + 1, text, "N/A", "N/A", df.loc[index, "category"]))

            progress_var.set((index + 1) / total_rows * 100)
            progress_label.config(text=f"Processing: {int(progress_var.get())}%")
            root.update_idletasks()

        for row in tree.get_children():
            tree.delete(row)

        for index, (serial, text, sentiment, polarity, category) in enumerate(results):
            tag = "oddrow" if index % 2 == 0 else "evenrow"
            tree.insert("", tk.END, values=(serial, text, sentiment, polarity, category), tags=(tag,))

        progress_label.config(text="Processing Completed.")

        global pie_chart_img, bar_chart_img, analyzed_df
        pie_chart_img = create_pie_chart_image(sentiment_counts)
        bar_chart_img = create_bar_chart_image(category_counts)

        analyzed_df = pd.DataFrame(results, columns=["Serial", "Text", "Sentiment", "Polarity", "Category"])

    except FileNotFoundError:
        messagebox.showerror("Error", "File not found.")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred: {e}")

def create_bar_chart_image(category_counts):
    plt.figure(figsize=(6, 4))
    sns.barplot(x=list(category_counts.keys()), y=list(category_counts.values()), palette="Set2")
    plt.xlabel("Categories")
    plt.ylabel("Tweet Count")
    plt.title("Tweet Category Distribution")
    plt.xticks(rotation=30, fontsize=8)
    plt.yticks(fontsize=8)
    plt.tight_layout()
    buf = BytesIO()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)
    img = Image.open(buf)
    img_tk = ImageTk.PhotoImage(img)
    return img_tk

def create_pie_chart_image(sentiment_counts):
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.pie(sentiment_counts.values(), labels=sentiment_counts.keys(), autopct='%1.1f%%', startangle=90)
    ax.axis('equal')
    buf = BytesIO()
    plt.savefig(buf, format='png')
    plt.close(fig)
    buf.seek(0)
    img = Image.open(buf)
    img_tk = ImageTk.PhotoImage(img)
    return img_tk

def analyze_dataset():
    filepath = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv"), ("Excel files", "*.xlsx;*.xls"), ("Text files", "*.txt")])
    if not filepath:
        return
    thread = threading.Thread(target=analyze_dataset_thread, args=(filepath,))
    thread.start()

def analyze_sentence():
    text = text_entry.get("1.0", tk.END).strip()
    if not text:
        messagebox.showerror("Error", "Please enter a sentence.")
        return
    sentiment, polarity, error = analyze_sentiment(text)
    if error:
        messagebox.showerror("Error", error)
        return
    sentiment_label.config(text=f"Sentiment: {sentiment}")
    polarity_label.config(text=f"Polarity Score:{polarity:.4f}")

def show_charts():
    if 'pie_chart_img' not in globals() or 'bar_chart_img' not in globals():
        messagebox.showerror("Error", "Please analyze a dataset first.")
        return

    charts_window = tk.Toplevel(root)
    charts_window.title("Charts")

    # Create a frame to hold the charts side-by-side
    charts_frame = tk.Frame(charts_window)
    charts_frame.pack(pady=10)

    pie_label = tk.Label(charts_frame, image=pie_chart_img)
    pie_label.pack(side=tk.LEFT, padx=10)

    bar_label = tk.Label(charts_frame, image=bar_chart_img)
    bar_label.pack(side=tk.LEFT, padx=10)

def download_table():
    global analyzed_df
    if 'analyzed_df' not in globals() or analyzed_df is None:
        messagebox.showerror("Error", "Please analyze a dataset first.")
        return

    filepath = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])
    if not filepath:
        return

    try:
        analyzed_df.to_csv(filepath, index=False)
        messagebox.showinfo("Success", "Table downloaded successfully.")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during download: {e}")

# Create GUI
def main_gui():
    global root, text_entry, sentiment_label, polarity_label, progress_var, progress_bar, progress_label, tree
    root = tk.Tk()
    root.title("Sentiment Analysis and Category Classification")
    root.state('zoomed')
    root.config(bg="#f0f8ff")

    title_label = tk.Label(root, text="Sentiment Analysis and Category Classification", font=("Helvetica", 18, "bold"), bg="#f0f8ff", fg="#4b8b3b")
    title_label.pack(pady=10)

    text_entry = tk.Text(root, height=5, width=90, font=("Helvetica", 12))
    text_entry.pack(pady=10)

    analyze_sentence_button = tk.Button(root, text="Analyze Sentence", command=analyze_sentence, font=("Helvetica", 12), bg="#4b8b3b", fg="white", relief="raised", width=20)
    analyze_sentence_button.pack(pady=5)

    sentiment_label = tk.Label(root, text="Sentiment: ", font=("Helvetica", 12, "bold"), bg="#f0f8ff")
    sentiment_label.pack()

    polarity_label = tk.Label(root, text="Polarity Score: ", font=("Helvetica", 12, "bold"), bg="#f0f8ff")
    polarity_label.pack()

    # Create a frame to hold the buttons horizontally
    button_frame = tk.Frame(root, bg="#f0f8ff")
    button_frame.pack(pady=10)

    analyze_dataset_button = tk.Button(button_frame, text="Analyze Dataset", command=analyze_dataset, font=("Helvetica", 12), bg="#4b8b3b", fg="white", relief="raised", width=20)
    analyze_dataset_button.pack(side=tk.LEFT, padx=5)

    show_charts_button = tk.Button(button_frame, text="Show Charts", command=show_charts, font=("Helvetica", 12), bg="#4b8b3b", fg="white", relief="raised", width=20)
    show_charts_button.pack(side=tk.LEFT, padx=5)

    download_table_button = tk.Button(button_frame, text="Download Table", command=download_table, font=("Helvetica", 12), bg="#4b8b3b", fg="white", relief="raised", width=20)
    download_table_button.pack(side=tk.LEFT, padx=5)

    progress_frame = tk.Frame(root, bg="#f0f8ff")
    progress_frame.pack(pady=10, fill=tk.BOTH, expand=True)

    progress_var = tk.DoubleVar()
    progress_bar = ttk.Progressbar(progress_frame, variable=progress_var, maximum=100)
    progress_bar.pack(pady=3, fill=tk.X, padx=20)

    progress_label = tk.Label(progress_frame, text="Processing: 0%", bg="#f0f8ff", font=("Helvetica", 12))
    progress_label.pack(pady=5)

    style = ttk.Style()
    style.configure("Treeview", rowheight=25, font=("Helvetica", 12))
    style.configure("Treeview.Heading", font=("Helvetica", 12, "bold"))
    style.layout("Treeview", [('Treeview.treearea', {'sticky': 'nswe'})])
    style.map("Treeview", background=[("selected", "#4b8b3b")])

    tree = ttk.Treeview(progress_frame, columns=("Serial", "Text", "Sentiment", "Polarity", "Category"), show="headings", height=10, style="Treeview")
    tree.heading("Serial", text="Serial")
    tree.heading("Text", text="Text")
    tree.heading("Sentiment", text="Sentiment")
    tree.heading("Polarity", text="Polarity")
    tree.heading("Category", text="Category")

    tree.column("Serial", width=50, anchor="center")
    tree.column("Text", width=200, anchor="center")
    tree.column("Sentiment", width=100, anchor="center")
    tree.column("Polarity", width=80, anchor="center")
    tree.column("Category", width=120, anchor="center")

    tree["show"] = "headings"
    tree.tag_configure("oddrow", background="#f9f9f9")
    tree.tag_configure("evenrow", background="#e6e6e6")
    tree.pack(fill=tk.BOTH, expand=True, side=tk.LEFT)

    root.mainloop()









def login():
    login_window = tk.Tk()
    login_window.title("Login")
    login_window.state('zoomed')
    login_window.configure(bg="#e0f2fe")  # Light blue/gray background

    # Title Frame with Green Background Strip
    title_frame = tk.Frame(login_window, bg="#4b8b3b")  # Green background strip
    title_frame.pack(fill=tk.X, pady=(40, 0))  # Fill horizontally, top padding

    title_label = tk.Label(title_frame, text="SIGN IN", font=("Roboto", 24, "bold"), bg="#4b8b3b", fg="white")  # White text on green
    title_label.pack(pady=10)  # Padding inside the frame

    # Username - Bold Label
    username_label = tk.Label(login_window, text="Username", font=("Roboto", 14, "bold"), bg="#e0f2fe", fg="#666")
    username_label.pack(pady=(20, 2))  # Increased top padding
    username_entry = tk.Entry(login_window, font=("Roboto", 12), width=30, borderwidth=2, relief=tk.FLAT)
    username_entry.pack(pady=(2, 10))

    # Password - Bold Label
    password_label = tk.Label(login_window, text="Password", font=("Roboto", 14, "bold"), bg="#e0f2fe", fg="#666")
    password_label.pack(pady=(5, 2))
    password_entry = tk.Entry(login_window, show="*", font=("Roboto", 12), width=30, borderwidth=2, relief=tk.FLAT)
    password_entry.pack(pady=(2, 10))

    def check_login(event=None):  # Add event parameter
        username = username_entry.get()
        password = password_entry.get()
        # Replace with your actual authentication logic
        if username == "user" and password == "pass":
            login_window.destroy()
            main_gui()  # Call main_gui() after successful login
        else:
            messagebox.showerror("Login Failed", "Incorrect username or password.")

    # Login Button
    login_button = tk.Button(login_window, text="Sign In", command=check_login, font=("Roboto", 12, "bold"), bg="#4b8b3b", fg="white", padx=20, pady=10, relief=tk.FLAT, borderwidth=0)
    login_button.pack(pady=(20, 20))

    # Bind Enter key to check_login function
    login_window.bind('<Return>', check_login)

    login_window.mainloop()

if __name__ == "__main__":
    login()