In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from IPython.display import display
import nltk
import numpy as np
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
ps = PorterStemmer()
stop_words = set(stopwords.words('english'))
from nltk.stem import WordNetLemmatizer 
lemmatizer = WordNetLemmatizer()
import gensim

from sklearn.feature_extraction.text import CountVectorizer
import re
import pickle

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [3]:
### Preprocess Topic Modelling
stop_words = set(stopwords.words('english'))
ps = PorterStemmer()
lemmatizer = WordNetLemmatizer()

def preprocess(text):
  text_lower = text.lower()
  tokens = word_tokenize(text_lower)
  word_more_than_3 = [w for w in tokens if len(w) > 3]
  stopwords_removed = [word for word in word_more_than_3 if not word in stop_words]
  stemmed_word = [ps.stem(w) for w in stopwords_removed]
  lemmatized_words = [lemmatizer.lemmatize(w) for w in stemmed_word]
  processed_tweet = lemmatized_words
  return processed_tweet

def extractTopic(topicProb):
  maxProb = max(topicProb, key=lambda item:item[1])
  if (maxProb[0] == 0):
    return 'Food'
  elif (maxProb[0] == 1):
    return 'Place'
  elif (maxProb[0] == 2):
    return 'Service'

def get_preprocessed_text(text, joining=False, stemming=True, lemmatizing=False, tokenize=True, case_fold=True, punctuation_remove=True, remove_stopword=False):
  tokenizer = nltk.RegexpTokenizer(r"\w+")
  stemmer = PorterStemmer()
  stop_words = set(stopwords.words('english'))
  REPLACE_WITH_SPACE = re.compile("(<br\s/><br\s/?)|(-)|(/)|(:).")

  if case_fold:
    text = text.lower()
  if punctuation_remove:
    text = REPLACE_WITH_SPACE.sub(" ", text)
  if tokenize:
    text = tokenizer.tokenize(text)
  if remove_stopword:
    text = [token for token in text if token not in stop_words]
  if stemming:
    text = [stemmer.stem(w) for w in text]
  if lemmatizing:
    pass

  if joining:
    text = " ".join(text)
  
  return text

In [4]:
# load lda model
lda_model = gensim.models.LdaMulticore.load('/content/drive/Shareddrives/NLP/raw_lda/lda_model.model')

# vectorizer for sentiment
filename_cv="/content/drive/Shareddrives/NLP/vectorizer_final.pkl"
with open(filename_cv, "rb") as file:
  vectorizer = pickle.load(file)

# sentiment analysis model
filename="/content/drive/Shareddrives/NLP/sentiment_prediction_model_final.pkl"
with open(filename, 'rb') as file:
  sentiment_pred_model = pickle.load(file)

In [5]:
# application Pipeline
def testing(input):
  # Preprocess Input for topic extraction
  preprocessed_input = preprocess(input)

  # predict topic
  doc_bow = lda_model.id2word.doc2bow(preprocessed_input)
  topicProb = lda_model[doc_bow]
  extractedTopic = extractTopic(topicProb)

  # Sentiment analysis
  clean_text = get_preprocessed_text(input, joining=True)
  print(clean_text)
  X = vectorizer.transform(np.array([clean_text])).toarray()

  # Sentiment Predict
  sentiment = sentiment_pred_model.predict(X)

  # Result
  result = pd.DataFrame()
  result["text"] = [input]
  result["topic"] = [extractedTopic]

  if sentiment == -1: result["sentiment"] = ["negative"]
  elif sentiment == 0: result["sentiment"] = ["neutral"]
  else: result["sentiment"] = ["positive"]

  display(result)
  print(topicProb)

In [15]:
input = """The seafood was great but its very spicy"""

testing(input)

the seafood wa great but it veri spici


Unnamed: 0,text,topic,sentiment
0,The seafood was great but its very spicy,Food,neutral


[(0, 0.48691884), (1, 0.3857423), (2, 0.12733886)]


In [16]:
input2 = """Each slice was inconsistent with toppings and how cooked it was. Salad was so rotted I had to throw it away or risk food poisoning. The pasta looked crusty so I stayed away should have just stayed away from this place altogether.Awful. Anyone with more than a one star review was paid for sure."""

testing(input2)

each slice wa inconsist with top and how cook it wa salad wa so rot i had to throw it away or risk food poison the pasta look crusti so i stay away should have just stay away from thi place altogeth aw anyon with more than a one star review wa paid for sure


Unnamed: 0,text,topic,sentiment
0,Each slice was inconsistent with toppings and ...,Place,negative


[(0, 0.031658404), (1, 0.73701787), (2, 0.23132372)]


In [17]:
input3 = """This place is great! Atmosphere is chill and cool but the staff is also really friendly. They know what they’re doing and what they’re talking about, and you can tell making the customers happy is their main priority. Food is pretty good, some italian classics and some twists, and for their prices it’s 100% worth it."""

testing(input3)

thi place is great atmospher is chill and cool but the staff is also realli friendli they know what they re do and what they re talk about and you can tell make the custom happi is their main prioriti food is pretti good some italian classic and some twist and for their price it s 100 worth it


Unnamed: 0,text,topic,sentiment
0,This place is great! Atmosphere is chill and c...,Place,positive


[(0, 0.2455777), (1, 0.38936552), (2, 0.36505675)]


In [18]:
input4 = """Seafood is delicious, but the staff a bit horrible when serving the dish"""
testing(input4)

seafood is delici but the staff a bit horribl when serv the dish


Unnamed: 0,text,topic,sentiment
0,"Seafood is delicious, but the staff a bit horr...",Service,negative


[(0, 0.07484132), (1, 0.07160589), (2, 0.8535528)]
