In [1]:
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from xgboost import XGBClassifier   
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob
import contractions
import emoji

In [2]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\krish\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\krish\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\krish\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\krish\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [4]:
tfidf_path = "K:/Computer Science/AIMLDL/Brand_Reputation_Management_System/sentiment_analysis/feature_extraction/features/tfidf.pkl"
    
with open(tfidf_path, 'rb') as file:
    tfidf = pickle.load(file)

label_encoder_path = "K:/Computer Science/AIMLDL/Brand_Reputation_Management_System/sentiment_analysis/feature_extraction/features/label_encoder.pkl"
with open(label_encoder_path, 'rb') as f:
    label_encoder = pickle.load(f)

model_path = "K:/Computer Science/AIMLDL/Brand_Reputation_Management_System/sentiment_analysis/model_building/models/XGBClassifier.pkl"
with open(model_path, 'rb') as file:
    model = pickle.load(file)

In [5]:
def preprocess(text):
    text = contractions.fix(text)
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = emoji.replace_emoji(text, replace='')
    text = re.sub(r"[^a-zA-Z\s]", "", text)    
    text = text.lower()
    text = str(TextBlob(text).correct())
    words = word_tokenize(text)
    words = [word for word in words if word not in stop_words]
    words = [lemmatizer.lemmatize(word) for word in words]
    preprocessed_text = " ".join(words)
    
    return preprocessed_text

def predict_sentiment(text):
  preprocessed_text = preprocess(text)
  custom_input_tfidf = tfidf.transform([preprocessed_text])

  # Predict sentiment
  prediction = model.predict(custom_input_tfidf)
  predicted_sentiment = label_encoder.inverse_transform(prediction)[0]
  return predicted_sentiment

In [6]:
custom_text = "I've used this product for a while now, and it's been reliable. However, it's not the best I've ever used"
predicted_sentiment = predict_sentiment(custom_text)
print(f"Predicted sentiment: {predicted_sentiment}")

Predicted sentiment: positive
