In [1]:
import html, json, re
import pandas as pd
import emoji
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from joblib import load
import joblib


## preprocess

In [2]:
with open("./slang_dict.json",'r') as f:
    slang_dict = json.load(f)
    
with open("./emoji_dic.json",'r') as f:
    emoji_dict = json.load(f)

In [3]:
def preprocess(text):
    # converting html codes 
    decoded_text = html.unescape(text)
    decoded_text = decoded_text.lower()
    # match strings starting with 'http'
    text = re.sub(r'http\S+', '', decoded_text)
    # match strings start with '<' and end with '>'
    text = re.sub(r'<.*?>', '', text)

    # remove emoji
    for em, meaning in emoji_dict.items():
        text = text.replace(em, meaning)
    text = emoji.demojize(text)

    #standard preprocessing technique
    tokens = word_tokenize(text)
    # remove punctuation
    punctuation_removed = [word for word in tokens if word not in list(string.punctuation)]
    # lemmatization
    lemmatized_text = [WordNetLemmatizer().lemmatize(word) for word in punctuation_removed]
    text =  ' '.join(lemmatized_text)

    ## removing slangs
    words = text.split()
    corrected_words = [slang_dict.get(word, word) for word in words]
    text = ' '.join(corrected_words)

    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)

    return text 



## featurization

In [4]:
# from transformers import BertTokenizer, BertModel
# import torch

# # Check if CUDA is available
# if torch.cuda.is_available():
#     device = torch.device("cuda")
#     print(f"Using GPU: {torch.cuda.get_device_name(0)}")
# else:
#     device = torch.device("cpu")
#     print("Using CPU")

# # Load pre-trained BERT model and tokenizer
# bert_model = BertModel.from_pretrained('bert-base-uncased', return_dict=True).to(device)
# bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [5]:
# def get_bert_embedding(comment):
#     inputs = bert_tokenizer.encode_plus(
#         comment,
#         add_special_tokens=True,
#         return_tensors='pt',
#         max_length=128,
#         pad_to_max_length=True,
#         return_attention_mask=True
#     )
    
#     # Move input tensors to the GPU
#     inputs = {key: value.to(device) for key, value in inputs.items()}

#     with torch.no_grad():
#         outputs = bert_model(inputs['input_ids'], attention_mask=inputs['attention_mask'])

#     # Extract the [CLS] token's embedding and move it back to the CPU
#     cls_embedding = outputs['last_hidden_state'][:, 0, :].squeeze().cpu().numpy()
#     return cls_embedding

In [6]:
loaded_tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')


def get_tfidf_embedding(comment):
    # Ensure comment is in list format
    if isinstance(comment, str):
        comment = [comment]
    # Transform the input text using the loaded tfidf_vectorizer
    input_text_tfidf = loaded_tfidf_vectorizer.transform(comment)
    return input_text_tfidf


In [7]:

# Load the model from the file
loaded_model = load('svm_classifier.joblib')
def infer(X):
   X = X.reshape(1, -1)
   return loaded_model.predict(X) 


In [8]:
aa = """Where do I even begin with the Oppenheimer movie? It's a perplexing mess of a film that fails to capture the essence of its subject matter and leaves the audience scratching their heads in confusion. With high expectations due to its talented cast and promising premise, this movie ultimately disappoints on every level.

First and foremost, the pacing is an absolute nightmare. The movie meanders aimlessly, dragging out scenes that add little to the plot and leaving essential elements underdeveloped. It's almost as if the filmmakers had no idea how to structure the narrative or maintain a cohesive flow. As a result, the movie feels like a jumbled collection of disconnected events that leave viewers struggling to make sense of the story.

The characters in Oppenheimer are equally underwhelming. Despite the exceptional actors involved, their performances are hampered by a lack of depth and poorly written dialogues. The titular character, J. Robert Oppenheimer, comes across as one-dimensional and devoid of real personality or emotional resonance. Supporting characters receive even less attention, leaving us indifferent to their fates and unable to invest in their arcs.

The film's attempts at historical accuracy are laughable at best. While some creative liberties are expected in any biographical movie, Oppenheimer takes it to an extreme. The inaccuracies and distortions of actual events not only disrespect the legacy of those involved but also undermine the film's credibility. The filmmakers were more interested in sensationalism than telling a compelling and fact-based story.

Perhaps the most egregious aspect of the Oppenheimer movie is its lack of a coherent message or thematic depth. It raises significant moral and ethical questions about the development of nuclear weapons and their consequences, but it never delves into these issues with any real substance. Instead, the movie superficially glazes over these crucial aspects, leaving viewers with a sense of emptiness and missed opportunities.

The cinematography and direction do little to salvage the film's shortcomings. The visual style lacks creativity, and the director seems to rely on tired and overused cinematic clichés. The lack of a distinct visual identity only adds to the overall mediocrity of the movie.

In conclusion, the Oppenheimer movie is a colossal disappointment. Its weak storytelling, poorly developed characters, historical inaccuracies, and lack of a compelling message all contribute to a film that is an absolute failure. Save your time and money and skip this cinematic disaster. There are far better biographical dramas out there that do justice to their subjects and deliver a more engaging and coherent experience. """

In [9]:
def get_sentiment(review):
    processed = preprocess(review)
    features = get_tfidf_embedding(processed)
    output = infer(features)
    return output

get_sentiment(aa)

array(['negative'], dtype=object)

In [10]:
import tkinter as tk
from tkinter import messagebox

# Define the function that will determine the sentiment.
def get_sentiment_button(review):    
    return get_sentiment(review)

# Function to be called when the "Get Sentiment" button is pressed.
def on_button_press():
    review = entry.get("1.0", "end-1c")  # Get text from the entry widget.
    sentiment = get_sentiment_button(review)
    messagebox.showinfo("Sentiment Result", f"The sentiment is: {sentiment}")

# Create the main window.
root = tk.Tk()
root.title("Sentiment Analysis")

# Create and pack widgets.
label = tk.Label(root, text="Enter your review:")
label.pack(pady=20)

entry = tk.Text(root, height=10, width=50)
entry.pack(pady=20)

button = tk.Button(root, text="Get Sentiment", command=on_button_press)
button.pack(pady=20)

# Run the application.
root.mainloop()
