## NLP with Machine Learning

### Sentiment Analysis

In [1]:
import pandas as pd

# create a list of sentences
data = [
    "When life gives you lemons, make lemonade! 🙂",
    "She bought 2 lemons for $1 at Maven Market.",
    "A dozen lemons will make a gallon of lemonade. [AllRecipes]",
    "lemon, lemon, lemons, lemon, lemon, lemons",
    "He's running to the market to get a lemon — there's a great sale today.",
    "iced tea is my favorite",
    "I didn't like the taste of that lemonade at all.",
    "My lemons went bad before I could use them, unfortunately.",
]

# expand the column width to see the full sentences
pd.set_option('display.max_colwidth', None)

# turn it into a dataframe
data_df = pd.DataFrame(data, columns=["sentence"])
data_df.head()

# make a copy of the dataframe
df = data_df.copy()
df.head()

Unnamed: 0,sentence
0,"When life gives you lemons, make lemonade! 🙂"
1,She bought 2 lemons for $1 at Maven Market.
2,A dozen lemons will make a gallon of lemonade. [AllRecipes]
3,"lemon, lemon, lemons, lemon, lemon, lemons"
4,He's running to the market to get a lemon — there's a great sale today.


In [2]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/126.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [3]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [4]:
test = df['sentence'][0]
test

'When life gives you lemons, make lemonade! 🙂'

In [5]:
analyzer = SentimentIntensityAnalyzer()
analyzer.polarity_scores(test)

{'neg': 0.0, 'neu': 0.75, 'pos': 0.25, 'compound': 0.4587}

In [6]:
analyzer.polarity_scores(test)['compound']

0.4587

In [7]:
def get_sentiment(text):
  return SentimentIntensityAnalyzer().polarity_scores(text)['compound']

In [8]:
df['sentiment'] = df.sentence.apply(get_sentiment)
df

Unnamed: 0,sentence,sentiment
0,"When life gives you lemons, make lemonade! 🙂",0.4587
1,She bought 2 lemons for $1 at Maven Market.,0.0
2,A dozen lemons will make a gallon of lemonade. [AllRecipes],0.0
3,"lemon, lemon, lemons, lemon, lemon, lemons",0.0
4,He's running to the market to get a lemon — there's a great sale today.,0.6249
5,iced tea is my favorite,0.4588
6,I didn't like the taste of that lemonade at all.,-0.2755
7,"My lemons went bad before I could use them, unfortunately.",-0.7096


In [9]:
movies_df = pd.read_csv('movie_reviews.csv')
movies_df.head()

Unnamed: 0,movie_title,rating,genre,in_theaters_date,movie_info,directors,director_gender,tomatometer_rating,audience_rating,critics_consensus
0,A Dog's Journey,PG,"Drama, Kids & Family",5/17/19,"Bailey (voiced again by Josh Gad) is living the good life on the Michigan farm of his ""boy,"" Ethan (Dennis Quaid) and Ethan's wife Hannah (Marg Helgenberger). He even has a new playmate: Ethan and Hannah's baby granddaughter, CJ. The problem is that CJ's mom, Gloria (Betty Gilpin), decides to take CJ away. As Bailey's soul prepares to leave this life for a new one, he makes a promise to Ethan to find CJ and protect her at any cost. Thus begins Bailey's adventure through multiple lives filled with love, friendship and devotion as he, CJ (Kathryn Prescott), and CJ's best friend Trent (Henry Lau) experience joy and heartbreak, music and laughter, and few really good belly rubs.",Gail Mancuso,female,50,92,"A Dog's Journey is as sentimental as one might expect, but even cynical viewers may find their ability to resist shedding a tear stretched to the puppermost limit."
1,A Dog's Way Home,PG,Drama,1/11/19,"Separated from her owner, a dog sets off on an 400-mile journey to get back to the safety and security of the place she calls home. Along the way, she meets a series of new friends and manages to bring a little bit of comfort and joy to their lives.",Charles Martin Smith,male,60,71,"A Dog's Way Home may not quite be a family-friendly animal drama fan's best friend, but this canine adventure is no less heartwarming for its familiarity."
2,A Tuba to Cuba,NR,"Documentary, Musical & Performing Arts",2/15/19,"The leader of New Orleans' famed Preservation Hall Jazz Band seeks to fulfill his late father's dream of retracing their musical roots to the shores of Cuba in search of the indigenous music that gave birth to New Orleans jazz. A TUBA TO CUBA celebrates the triumph of the human spirit expressed through the universal language of music and challenges us to resolve to build bridges, not walls.","Danny Clinch, T.G. Herrington",male,100,82,
3,A Vigilante,R,Drama,3/29/19,"A once abused woman, Sadie (Olivia Wilde), devotes herself to ridding victims of their domestic abusers while hunting down the husband she must kill to truly be free. A Vigilante is a thriller inspired by the strength and bravery of real domestic abuse survivors and the incredible obstacles to safety they face.",Sarah Daggar-Nickson,female,92,50,"Led by Olivia Wilde's fearless performance and elevated by timely themes, A Vigilante is an uncompromising thriller that hits as hard as its protagonist."
4,After,PG-13,"Drama, Romance",4/12/19,"Based on Anna Todd's best-selling novel which became a publishing sensation on social storytelling platform Wattpad, AFTER follows Tessa (Langford), a dedicated student, dutiful daughter and loyal girlfriend to her high school sweetheart, as she enters her first semester in college. Armed with grand ambitions for her future, her guarded world opens up when she meets the dark and mysterious Hardin Scott (Tiffin), a magnetic, brooding rebel who makes her question all she thought she knew about herself and what she wants out of life.",Jenny Gage,female,17,72,"Tepid and tired, After's fun flourishes are let down by its generic story."


In [10]:
movie_info_df = movies_df[['movie_title', 'movie_info']]
movie_info_df

Unnamed: 0,movie_title,movie_info
0,A Dog's Journey,"Bailey (voiced again by Josh Gad) is living the good life on the Michigan farm of his ""boy,"" Ethan (Dennis Quaid) and Ethan's wife Hannah (Marg Helgenberger). He even has a new playmate: Ethan and Hannah's baby granddaughter, CJ. The problem is that CJ's mom, Gloria (Betty Gilpin), decides to take CJ away. As Bailey's soul prepares to leave this life for a new one, he makes a promise to Ethan to find CJ and protect her at any cost. Thus begins Bailey's adventure through multiple lives filled with love, friendship and devotion as he, CJ (Kathryn Prescott), and CJ's best friend Trent (Henry Lau) experience joy and heartbreak, music and laughter, and few really good belly rubs."
1,A Dog's Way Home,"Separated from her owner, a dog sets off on an 400-mile journey to get back to the safety and security of the place she calls home. Along the way, she meets a series of new friends and manages to bring a little bit of comfort and joy to their lives."
2,A Tuba to Cuba,"The leader of New Orleans' famed Preservation Hall Jazz Band seeks to fulfill his late father's dream of retracing their musical roots to the shores of Cuba in search of the indigenous music that gave birth to New Orleans jazz. A TUBA TO CUBA celebrates the triumph of the human spirit expressed through the universal language of music and challenges us to resolve to build bridges, not walls."
3,A Vigilante,"A once abused woman, Sadie (Olivia Wilde), devotes herself to ridding victims of their domestic abusers while hunting down the husband she must kill to truly be free. A Vigilante is a thriller inspired by the strength and bravery of real domestic abuse survivors and the incredible obstacles to safety they face."
4,After,"Based on Anna Todd's best-selling novel which became a publishing sensation on social storytelling platform Wattpad, AFTER follows Tessa (Langford), a dedicated student, dutiful daughter and loyal girlfriend to her high school sweetheart, as she enters her first semester in college. Armed with grand ambitions for her future, her guarded world opens up when she meets the dark and mysterious Hardin Scott (Tiffin), a magnetic, brooding rebel who makes her question all she thought she knew about herself and what she wants out of life."
...,...,...
161,Velvet Buzzsaw,"Velvet Buzzsaw is a satirical thriller set in the contemporary art world scene of Los Angeles, where big money artists and mega-collectors pay a high price when art collides with commerce."
162,What Men Want,"Inspired by the Nancy Meyers hit romantic comedy WHAT WOMEN WANT, this film follows the story of a female sports agent (Henson) who has been constantly boxed out by her male colleagues. When she gains the power to hear mens' thought, she is able to shift the paradigm to her advantage as she races to sign the NBA's next superstar"
163,Wild Rose,"WILD ROSE tells the complicated story of Rose-Lynn, a woman on a quest to become a country music star, while also grappling with the responsibilities of being recently released from prison and a young mother of two children."
164,Wine Country,"In honor of Rebecca (Rachel Dratch)'s 50th birthday, Abby (Amy Poehler) plans a scenic Napa getaway with their best, longtime friends. Workaholic Catherine (Ana Gasteyer), post-op Val (Paula Pell), homebody Jenny (Emily Spivey), and weary mom Naomi (Maya Rudolph) are equally sold on the chance to relax and reconnect. Yet as the alcohol flows, real world uncertainties intrude on the punchlines and gossip, and the women begin questioning their friendships and futures."


In [11]:
test = movie_info_df['movie_info'][0]
test

'Bailey (voiced again by Josh Gad) is living the good life on the Michigan farm of his "boy," Ethan (Dennis Quaid) and Ethan\'s wife Hannah (Marg Helgenberger). He even has a new playmate: Ethan and Hannah\'s baby granddaughter, CJ. The problem is that CJ\'s mom, Gloria (Betty Gilpin), decides to take CJ away. As Bailey\'s soul prepares to leave this life for a new one, he makes a promise to Ethan to find CJ and protect her at any cost. Thus begins Bailey\'s adventure through multiple lives filled with love, friendship and devotion as he, CJ (Kathryn Prescott), and CJ\'s best friend Trent (Henry Lau) experience joy and heartbreak, music and laughter, and few really good belly rubs.'

In [12]:
movie_info_df['sentiment'] = movie_info_df.movie_info.apply(get_sentiment)
movie_info_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movie_info_df['sentiment'] = movie_info_df.movie_info.apply(get_sentiment)


Unnamed: 0,movie_title,movie_info,sentiment
0,A Dog's Journey,"Bailey (voiced again by Josh Gad) is living the good life on the Michigan farm of his ""boy,"" Ethan (Dennis Quaid) and Ethan's wife Hannah (Marg Helgenberger). He even has a new playmate: Ethan and Hannah's baby granddaughter, CJ. The problem is that CJ's mom, Gloria (Betty Gilpin), decides to take CJ away. As Bailey's soul prepares to leave this life for a new one, he makes a promise to Ethan to find CJ and protect her at any cost. Thus begins Bailey's adventure through multiple lives filled with love, friendship and devotion as he, CJ (Kathryn Prescott), and CJ's best friend Trent (Henry Lau) experience joy and heartbreak, music and laughter, and few really good belly rubs.",0.9837
1,A Dog's Way Home,"Separated from her owner, a dog sets off on an 400-mile journey to get back to the safety and security of the place she calls home. Along the way, she meets a series of new friends and manages to bring a little bit of comfort and joy to their lives.",0.9237
2,A Tuba to Cuba,"The leader of New Orleans' famed Preservation Hall Jazz Band seeks to fulfill his late father's dream of retracing their musical roots to the shores of Cuba in search of the indigenous music that gave birth to New Orleans jazz. A TUBA TO CUBA celebrates the triumph of the human spirit expressed through the universal language of music and challenges us to resolve to build bridges, not walls.",0.9360
3,A Vigilante,"A once abused woman, Sadie (Olivia Wilde), devotes herself to ridding victims of their domestic abusers while hunting down the husband she must kill to truly be free. A Vigilante is a thriller inspired by the strength and bravery of real domestic abuse survivors and the incredible obstacles to safety they face.",-0.0334
4,After,"Based on Anna Todd's best-selling novel which became a publishing sensation on social storytelling platform Wattpad, AFTER follows Tessa (Langford), a dedicated student, dutiful daughter and loyal girlfriend to her high school sweetheart, as she enters her first semester in college. Armed with grand ambitions for her future, her guarded world opens up when she meets the dark and mysterious Hardin Scott (Tiffin), a magnetic, brooding rebel who makes her question all she thought she knew about herself and what she wants out of life.",0.9349
...,...,...,...
161,Velvet Buzzsaw,"Velvet Buzzsaw is a satirical thriller set in the contemporary art world scene of Los Angeles, where big money artists and mega-collectors pay a high price when art collides with commerce.",-0.2732
162,What Men Want,"Inspired by the Nancy Meyers hit romantic comedy WHAT WOMEN WANT, this film follows the story of a female sports agent (Henson) who has been constantly boxed out by her male colleagues. When she gains the power to hear mens' thought, she is able to shift the paradigm to her advantage as she races to sign the NBA's next superstar",0.9158
163,Wild Rose,"WILD ROSE tells the complicated story of Rose-Lynn, a woman on a quest to become a country music star, while also grappling with the responsibilities of being recently released from prison and a young mother of two children.",-0.5106
164,Wine Country,"In honor of Rebecca (Rachel Dratch)'s 50th birthday, Abby (Amy Poehler) plans a scenic Napa getaway with their best, longtime friends. Workaholic Catherine (Ana Gasteyer), post-op Val (Paula Pell), homebody Jenny (Emily Spivey), and weary mom Naomi (Maya Rudolph) are equally sold on the chance to relax and reconnect. Yet as the alcohol flows, real world uncertainties intrude on the punchlines and gossip, and the women begin questioning their friendships and futures.",0.9081


In [13]:
movie_info_df_sorted = movie_info_df.sort_values(by='sentiment', ascending=False).head(10)
movie_info_df_sorted[['movie_title', 'sentiment']]

Unnamed: 0,movie_title,sentiment
23,Breakthrough,0.9915
81,Missing Link,0.9909
130,The Laundromat,0.9908
48,Five Feet Apart,0.9889
156,UglyDolls,0.9862
93,Red Joan,0.9848
49,Giant Little Ones,0.9839
0,A Dog's Journey,0.9837
36,Dumbo,0.9801
71,Long Shot,0.9778


In [14]:
movie_info_df_sorted = movie_info_df.sort_values(by='sentiment').head(10)
movie_info_df_sorted[['movie_title', 'sentiment']]

Unnamed: 0,movie_title,sentiment
7,All Is True,-0.9955
148,The Wind,-0.9838
83,Nightmare Cinema,-0.9756
154,Triple Threat,-0.9696
11,Angel of Mine,-0.9687
27,Charlie Says,-0.9643
113,The Curse of La Llorona,-0.9628
87,Pet Sematary,-0.959
142,The Standoff at Sparrow Creek,-0.959
40,El Chicano,-0.9578


## Text Classification

In [15]:
# import libraries
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [16]:
reviews = pd.read_excel('Popchip_Reviews.xlsx')
reviews.head()

Unnamed: 0,Id,UserId,Rating,Priority,Title,Text
0,23689,A21SYVGVNG8RAS,5,Low,Yummy snacks!,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.
1,23690,AQJYXC0MPRQJL,5,Low,Great chip that is different from the rest,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more."
2,23691,A30NYUHEDLWI0Y,5,Low,Great Alternative to Potato Chips,"I just love these chips! I was always a big fan of potato chips, but haven't had one since I discovered popchips. They are great for dipping or all alone. I am constantly re-ordering them. One note however-if you are on a low salt diet these chips are probably not for you. They are high in sodium. We go through a case every two months. If you love them it pays to join the subscribe and save program through Amazon. You save money and stay supplied!"
3,23692,A2NU55U9LKTB5J,3,High,Not somthing I would crave,"These tasted like potatoe stix, that we got in grade school with our lunches usually on pizza day. They were the bomb then, not so much now. Won't buy again unless I get them for cheap or free."
4,23693,A225F7QFP5LIW2,5,Low,healthy and delicious,"These chips are great! They look almost like a flattened rice cake, but taste so much better, more like a potato chip. The bbq flavor is delicious. They are very low in fat and full of flavor. It is easy to eat an entire bag of these!"


In [17]:
reviews.shape

(564, 6)

In [18]:
reviews.Priority.value_counts()

Unnamed: 0_level_0,count
Priority,Unnamed: 1_level_1
Low,447
High,117


In [19]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [20]:
def lower_remove(series):
  return series.str.lower().str.replace(r'\[.*?\]', '', regex=True).str.replace(r'[^\w\s]', '', regex=True)

def token_lemma_nonstop(text):
  return ' '.join([token.lemma_ for token in nlp(text) if not token.is_stop])

In [21]:
def clean_and_normalizer(series):
  return lower_remove(series).apply(token_lemma_nonstop)

In [22]:
reviews['Text_Clean'] = clean_and_normalizer(reviews['Text'])
reviews.Text_Clean

Unnamed: 0,Text_Clean
0,popchip bomb use parmesan garlic scoop cottage cheese healthy alternative chip dip healthy eat program save
1,like puff nature chip make unique chip market order salt vinegar absolutely love flavor hand favorite chip try cheddar regular flavor cheddar 45 regular 35 prefer strong flavor obviously case regular salt vinegar kind weak compare regular sv chip flavorful make want come
2,love chip big fan potato chip not discover popchip great dipping constantly reorder note howeverif low salt diet chip probably high sodium case month love pay join subscribe save program amazon save money stay supply
3,taste like potatoe stix get grade school lunch usually pizza day bomb will not buy cheap free
4,chip great look like flattened rice cake taste well like potato chip bbq flavor delicious low fat flavor easy eat entire bag
...,...
559,love potato chip eat bagful thank power provide nutrition survive nonetheless eat share potato chip year result watch weight lately discover great popchip hit spot number way lowcalorie alternative regular potato chip mean eat normalsized portion target weight loss goal second glutenfree great gluten intolerance issue taste great light airy crispy rich great potato flavor keep reach great product hope stay good long time
560,popchip hard find order case amazon regular basis price great goto snack fresh delicious amazonbr br definitely prefer original barbecue sour cream onion not try flavor original work fine use dip use dry seasoning m moodbr br not know anybody ve offer not want bag highly recommend
561,healthy alternative chip taste great great crunch flavor not bad taste baked chip regret not order sale go regular price worth
562,good ve start get automatically like original flavor lot


In [23]:
reviews

Unnamed: 0,Id,UserId,Rating,Priority,Title,Text,Text_Clean
0,23689,A21SYVGVNG8RAS,5,Low,Yummy snacks!,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.,popchip bomb use parmesan garlic scoop cottage cheese healthy alternative chip dip healthy eat program save
1,23690,AQJYXC0MPRQJL,5,Low,Great chip that is different from the rest,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more.",like puff nature chip make unique chip market order salt vinegar absolutely love flavor hand favorite chip try cheddar regular flavor cheddar 45 regular 35 prefer strong flavor obviously case regular salt vinegar kind weak compare regular sv chip flavorful make want come
2,23691,A30NYUHEDLWI0Y,5,Low,Great Alternative to Potato Chips,"I just love these chips! I was always a big fan of potato chips, but haven't had one since I discovered popchips. They are great for dipping or all alone. I am constantly re-ordering them. One note however-if you are on a low salt diet these chips are probably not for you. They are high in sodium. We go through a case every two months. If you love them it pays to join the subscribe and save program through Amazon. You save money and stay supplied!",love chip big fan potato chip not discover popchip great dipping constantly reorder note howeverif low salt diet chip probably high sodium case month love pay join subscribe save program amazon save money stay supply
3,23692,A2NU55U9LKTB5J,3,High,Not somthing I would crave,"These tasted like potatoe stix, that we got in grade school with our lunches usually on pizza day. They were the bomb then, not so much now. Won't buy again unless I get them for cheap or free.",taste like potatoe stix get grade school lunch usually pizza day bomb will not buy cheap free
4,23693,A225F7QFP5LIW2,5,Low,healthy and delicious,"These chips are great! They look almost like a flattened rice cake, but taste so much better, more like a potato chip. The bbq flavor is delicious. They are very low in fat and full of flavor. It is easy to eat an entire bag of these!",chip great look like flattened rice cake taste well like potato chip bbq flavor delicious low fat flavor easy eat entire bag
...,...,...,...,...,...,...,...
559,24248,A2TX79GR278JMA,5,Low,Life-saver for chip lovers trying to eat healthier,"I love potato chips. I could eat them by the bagful but thanks to the powers that be, this would not provide anyone with enough nutrition to survive. Nonetheless I have eaten my share of potato chips over the years, and perhaps as a result I have been watching my weight lately. I discovered these great popchips and they hit the spot in a number of ways. First they are a low-calorie alternative to regular potato chips, meaning I can eat a normal-sized portion and still keep on target with my weight loss goals. Second, they are gluten-free, which is great for those of us who have gluten intolerance issues. Third, they taste great - light and airy, crispy, rich in that great potato flavor that keeps me reaching for another one. This is a great product and I hope it stays around for a good long time.",love potato chip eat bagful thank power provide nutrition survive nonetheless eat share potato chip year result watch weight lately discover great popchip hit spot number way lowcalorie alternative regular potato chip mean eat normalsized portion target weight loss goal second glutenfree great gluten intolerance issue taste great light airy crispy rich great potato flavor keep reach great product hope stay good long time
560,24249,A3L61VG7RK40TW,5,Low,My Favorite Go-To Snack,"When PopChips were really hard to find, I was ordering them by the case from Amazon on a regular basis. The price was always great and these really are my go-to snack. They are always fresh and delicious from Amazon.<br /><br />I definitely prefer Original over barbecue and sour cream & onion. I haven't tried other flavors, but Original works just fine for me because I use it for various dips and will even use dry seasonings on them if I'm in the mood.<br /><br />I don't know anybody that I've offered some of these to who didn't want a bag of their own! Highly recommended.",popchip hard find order case amazon regular basis price great goto snack fresh delicious amazonbr br definitely prefer original barbecue sour cream onion not try flavor original work fine use dip use dry seasoning m moodbr br not know anybody ve offer not want bag highly recommend
561,24250,A2X9PNKZRBD1UC,5,Low,A healthy alternative,These are a much healthy alternative to most chips and they taste great. They have a great crunch and flavor and don't have that bad after taste that most baked chips have. My only regret is that I didn't order more when they were on sale there all gone now but even at regular price are worth it.,healthy alternative chip taste great great crunch flavor not bad taste baked chip regret not order sale go regular price worth
562,24251,A2LPC9SGRB3G4G,5,Low,Luv'em,"These are so good, I've started getting them automatically. I like the original flavor, but they have a lot of others.",good ve start get automatically like original flavor lot


In [24]:
cv = CountVectorizer(stop_words='english', ngram_range=(1,2), min_df=0.2)

In [25]:
X = cv.fit_transform(reviews.Text_Clean)
X_df = pd.DataFrame(X.toarray(), columns=cv.get_feature_names_out())
X_df

Unnamed: 0,bag,buy,calorie,chip,eat,flavor,good,great,like,love,popchip,potato,potato chip,salt,snack,taste,try
0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0
1,0,0,0,4,0,3,0,0,1,1,0,0,0,2,0,0,1
2,0,0,0,3,0,0,0,1,0,2,1,1,1,1,0,0,0
3,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0
4,1,0,0,2,1,2,0,1,2,0,0,1,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,0,0,0,3,3,1,1,5,0,1,1,4,3,0,0,1,0
560,1,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1
561,0,0,0,2,0,1,0,2,0,0,0,0,0,0,0,2,0
562,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0


In [26]:
y = reviews.Priority
y

Unnamed: 0,Priority
0,Low
1,Low
2,Low
3,High
4,Low
...,...
559,Low
560,Low
561,Low
562,Low


In [27]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

## Evaluate
print("accuracy: ", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

accuracy:  0.8407079646017699
              precision    recall  f1-score   support

        High       0.60      0.16      0.25        19
         Low       0.85      0.98      0.91        94

    accuracy                           0.84       113
   macro avg       0.73      0.57      0.58       113
weighted avg       0.81      0.84      0.80       113



In [28]:
new_reviews = pd.Series([
    "Pop chips are my favorite! I love these chips so much.",
    "Taste bad. I don't like the flavor options or taste.",
    "Solid snack."
])


In [29]:
new_reviews

Unnamed: 0,0
0,Pop chips are my favorite! I love these chips so much.
1,Taste bad. I don't like the flavor options or taste.
2,Solid snack.


In [30]:
new_reviews_clean = clean_and_normalizer(new_reviews)
new_reviews_clean

Unnamed: 0,0
0,pop chip favorite love chip
1,taste bad not like flavor option taste
2,solid snack


In [31]:
X_new_reviews_clean = cv.transform(new_reviews_clean)
X_new_reviews_clean_df = pd.DataFrame(X_new_reviews_clean.toarray(), columns=cv.get_feature_names_out())
X_new_reviews_clean_df

Unnamed: 0,bag,buy,calorie,chip,eat,flavor,good,great,like,love,popchip,potato,potato chip,salt,snack,taste,try
0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,2,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [32]:
y_pred_news_reviews = model.predict(X_new_reviews_clean_df)
y_pred_news_reviews



array(['Low', 'High', 'Low'], dtype='<U4')

In [33]:
## tfidf, logistic regression

In [34]:
tv = TfidfVectorizer(stop_words='english', ngram_range=(1,2), min_df=0.2)
Xt = tv.fit_transform(reviews.Text_Clean)
Xt_df = pd.DataFrame(Xt.toarray(), columns=tv.get_feature_names_out())
Xt_df

Unnamed: 0,bag,buy,calorie,chip,eat,flavor,good,great,like,love,popchip,potato,potato chip,salt,snack,taste,try
0,0.000000,0.000000,0.0,0.392603,0.656435,0.000000,0.000000,0.000000,0.000000,0.000000,0.644170,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.0,0.561185,0.000000,0.537701,0.000000,0.000000,0.195524,0.213766,0.000000,0.000000,0.000000,0.513094,0.000000,0.000000,0.220814
2,0.000000,0.000000,0.0,0.517908,0.000000,0.000000,0.000000,0.295101,0.000000,0.526082,0.283255,0.277355,0.333330,0.315684,0.000000,0.000000,0.000000
3,0.000000,0.690063,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.512918,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.510616,0.000000
4,0.252776,0.000000,0.0,0.340747,0.284866,0.435318,0.000000,0.291234,0.474884,0.000000,0.000000,0.273721,0.328962,0.000000,0.000000,0.236376,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,0.000000,0.000000,0.0,0.216106,0.361330,0.092028,0.103897,0.615680,0.000000,0.109758,0.118193,0.462925,0.417263,0.000000,0.000000,0.099942,0.000000
560,0.381673,0.000000,0.0,0.000000,0.000000,0.328649,0.000000,0.439742,0.000000,0.000000,0.422089,0.000000,0.000000,0.000000,0.459181,0.000000,0.404891
561,0.000000,0.000000,0.0,0.399843,0.000000,0.255407,0.000000,0.683486,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.554742,0.000000
562,0.000000,0.000000,0.0,0.000000,0.000000,0.537244,0.606536,0.000000,0.586074,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [35]:
Xt_train, Xt_test, y_train, y_test = train_test_split(Xt_df, y, test_size=0.2, random_state=42)

model2 = LogisticRegression()
model2.fit(Xt_train, y_train)


y_pred = model2.predict(Xt_test)

print('accuracy_score: ', accuracy_score(y_pred, y_test))
print(classification_report(y_pred, y_test))

accuracy_score:  0.8407079646017699
              precision    recall  f1-score   support

        High       0.05      1.00      0.10         1
         Low       1.00      0.84      0.91       112

    accuracy                           0.84       113
   macro avg       0.53      0.92      0.51       113
weighted avg       0.99      0.84      0.91       113



In [36]:
movies_df.head()
X_movie_review = clean_and_normalizer(movies_df.movie_info)
X_movie_review

Unnamed: 0,movie_info
0,bailey voice josh gad live good life michigan farm boy ethan dennis quaid ethans wife hannah marg helgenberger new playmate ethan hannah baby granddaughter cj problem cjs mom gloria betty gilpin decide cj away bailey soul prepare leave life new make promise ethan find cj protect cost begin bailey adventure multiple life fill love friendship devotion cj kathryn prescott cjs good friend trent henry lau experience joy heartbreak music laughter good belly rub
1,separate owner dog set 400mile journey safety security place call home way meet series new friend manage bring little bit comfort joy life
2,leader new orleans famed preservation hall jazz band seek fulfill late father dream retrace musical root shore cuba search indigenous music give birth new orleans jazz tuba cuba celebrate triumph human spirit express universal language music challenge resolve build bridge wall
3,abuse woman sadie olivia wilde devote rid victim domestic abuser hunt husband kill truly free vigilante thriller inspire strength bravery real domestic abuse survivor incredible obstacle safety face
4,base anna todd bestselle novel publishing sensation social storytelling platform wattpad follow tessa langford dedicated student dutiful daughter loyal girlfriend high school sweetheart enter semester college arm grand ambition future guard world open meet dark mysterious hardin scott tiffin magnetic brood rebel make question think know want life
...,...
161,velvet buzzsaw satirical thriller set contemporary art world scene los angeles big money artist megacollector pay high price art collide commerce
162,inspire nancy meyer hit romantic comedy woman want film follow story female sport agent henson constantly box male colleague gain power hear men think able shift paradigm advantage race sign nbas superstar
163,wild rose tell complicated story roselynn woman quest country music star grapple responsibility recently release prison young mother child
164,honor rebecca rachel dratchs 50th birthday abby amy poehler plan scenic napa getaway good longtime friend workaholic catherine ana gasteyer postop val paula pell homebody jenny emily spivey weary mom naomi maya rudolph equally sell chance relax reconnect alcohol flow real world uncertainty intrude punchline gossip woman begin question friendship future


In [37]:
cv2 = CountVectorizer(stop_words='english', min_df=0.1)
X_movie_review = cv2.fit_transform(X_movie_review)
X_movie_review_df = pd.DataFrame(X_movie_review.toarray(), columns=cv2.get_feature_names_out())
X_movie_review_df

Unnamed: 0,begin,discover,family,film,follow,force,friend,home,leave,life,...,man,new,set,star,story,turn,woman,world,year,young
0,1,0,0,0,0,0,1,0,1,3,...,0,2,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,1,1,0,1,...,0,1,1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,2,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,0,0,0,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
162,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,0
163,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,1,0,0,1
164,1,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,1,0,0


In [38]:
model_nb = MultinomialNB()
model_lr = LogisticRegression()

In [39]:
y_movie_info = movies_df.director_gender
y_movie_info

Unnamed: 0,director_gender
0,female
1,male
2,male
3,female
4,female
...,...
161,male
162,male
163,male
164,female


In [40]:
X_movie_review_df_train, X_movie_review_df_test, y_movie_info_train, y_movie_info_test = train_test_split(X_movie_review_df, y_movie_info, test_size=0.2, random_state=42)

In [41]:
model_nb.fit(X_movie_review_df_train, y_movie_info_train)
model_lr.fit(X_movie_review_df_train, y_movie_info_train)

In [42]:
y_pred_nb = model_nb.predict(X_movie_review_df_test)
y_pred_lr = model_lr.predict(X_movie_review_df_test)

In [43]:
print('accuracy_score with NaiveBayes: ', accuracy_score(y_pred_nb, y_movie_info_test))
print('accuracy_score with LogisticRegression: ', accuracy_score(y_pred_lr, y_movie_info_test))

accuracy_score with NaiveBayes:  0.7941176470588235
accuracy_score with LogisticRegression:  0.7647058823529411


In [44]:
print(classification_report(y_pred_nb, y_movie_info_test))

              precision    recall  f1-score   support

      female       0.20      0.25      0.22         4
        male       0.90      0.87      0.88        30

    accuracy                           0.79        34
   macro avg       0.55      0.56      0.55        34
weighted avg       0.81      0.79      0.80        34



In [45]:
print(classification_report(y_pred_lr, y_movie_info_test))

              precision    recall  f1-score   support

      female       0.40      0.29      0.33         7
        male       0.83      0.89      0.86        27

    accuracy                           0.76        34
   macro avg       0.61      0.59      0.60        34
weighted avg       0.74      0.76      0.75        34



In [47]:
tv2 = TfidfVectorizer(stop_words='english', min_df=0.05, max_df=0.2)
Xt2 = tv2.fit_transform(reviews.Text_Clean)
Xt_df2 = pd.DataFrame(Xt2.toarray(), columns=tv2.get_feature_names_out())
Xt_df2

Unnamed: 0,100,alternative,amazon,bad,bake,baked,bbq,big,bit,box,...,thing,think,time,variety,ve,vinegar,want,way,weight,work
0,0.0,0.465515,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.000000,0.348295,0.193511,0.000000,0.000000,0.000000
2,0.0,0.000000,0.354088,0.000000,0.0,0.000000,0.000000,0.428869,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.354475,0.000000,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,0.0,0.324462,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.0,0.299888,0.0,0.000000,0.000000,0.000000,0.337388,0.657147,0.000000
560,0.0,0.000000,0.190702,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.197896,0.000000,0.208227,0.000000,0.000000,0.247474
561,0.0,0.378621,0.000000,0.380993,0.0,0.396437,0.000000,0.000000,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
562,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.543142,0.000000,0.000000,0.000000,0.000000,0.000000


In [48]:
from sklearn.decomposition import NMF

In [77]:
nmf = NMF(n_components=5, random_state=42, max_iter=500)
W = nmf.fit_transform(Xt_df2) # document-topics
H = nmf.components_ # document-terms

In [78]:
H.shape

(5, 81)

In [79]:
W.shape

(564, 5)

In [80]:
def display_topic(H, num_words=10):
  for topic_num, topic_array in enumerate(H):
    top_features = topic_array.argsort()[::-1][:num_words]
    top_words = [tv2.get_feature_names_out()[i] for i in top_features]
    print("Topic", topic_num+1, ":", ', '.join(top_words))

In [81]:
display_topic(H)

Topic 1 : order, amazon, case, time, store, box, thing, price, know, product
Topic 2 : sweet, salty, br, light, rice, texture, think, little, crunchy, fry
Topic 3 : healthy, alternative, bbq, delicious, regular, feel, work, enjoy, nice, look
Topic 4 : br, vinegar, bbq, favorite, pepper, original, lime, think, sea, sour
Topic 5 : fat, low, weight, pop, regular, serve, diet, crunch, single, tasty


In [82]:
doc_topics = pd.DataFrame(W)
doc_topics.columns = ['orders', 'tast and texture', 'good', 'flavor', 'health']
doc_topics

Unnamed: 0,orders,tast and texture,good,flavor,health
0,0.000000,0.000000,0.403012,0.000000,0.000000
1,0.055080,0.000000,0.023755,0.115179,0.088048
2,0.067787,0.000000,0.000000,0.000000,0.153890
3,0.017647,0.002463,0.000000,0.000000,0.029204
4,0.000000,0.016166,0.040860,0.044669,0.190659
...,...,...,...,...,...
559,0.025953,0.010370,0.050308,0.000000,0.168847
560,0.108660,0.000000,0.022080,0.157261,0.032282
561,0.084727,0.000000,0.200482,0.000000,0.091203
562,0.019073,0.000000,0.000000,0.085505,0.037631


In [84]:
reviews_topics = pd.concat([reviews.Text, doc_topics], axis=1)
reviews_topics

Unnamed: 0,Text,orders,tast and texture,good,flavor,health
0,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.,0.000000,0.000000,0.403012,0.000000,0.000000
1,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more.",0.055080,0.000000,0.023755,0.115179,0.088048
2,"I just love these chips! I was always a big fan of potato chips, but haven't had one since I discovered popchips. They are great for dipping or all alone. I am constantly re-ordering them. One note however-if you are on a low salt diet these chips are probably not for you. They are high in sodium. We go through a case every two months. If you love them it pays to join the subscribe and save program through Amazon. You save money and stay supplied!",0.067787,0.000000,0.000000,0.000000,0.153890
3,"These tasted like potatoe stix, that we got in grade school with our lunches usually on pizza day. They were the bomb then, not so much now. Won't buy again unless I get them for cheap or free.",0.017647,0.002463,0.000000,0.000000,0.029204
4,"These chips are great! They look almost like a flattened rice cake, but taste so much better, more like a potato chip. The bbq flavor is delicious. They are very low in fat and full of flavor. It is easy to eat an entire bag of these!",0.000000,0.016166,0.040860,0.044669,0.190659
...,...,...,...,...,...,...
559,"I love potato chips. I could eat them by the bagful but thanks to the powers that be, this would not provide anyone with enough nutrition to survive. Nonetheless I have eaten my share of potato chips over the years, and perhaps as a result I have been watching my weight lately. I discovered these great popchips and they hit the spot in a number of ways. First they are a low-calorie alternative to regular potato chips, meaning I can eat a normal-sized portion and still keep on target with my weight loss goals. Second, they are gluten-free, which is great for those of us who have gluten intolerance issues. Third, they taste great - light and airy, crispy, rich in that great potato flavor that keeps me reaching for another one. This is a great product and I hope it stays around for a good long time.",0.025953,0.010370,0.050308,0.000000,0.168847
560,"When PopChips were really hard to find, I was ordering them by the case from Amazon on a regular basis. The price was always great and these really are my go-to snack. They are always fresh and delicious from Amazon.<br /><br />I definitely prefer Original over barbecue and sour cream & onion. I haven't tried other flavors, but Original works just fine for me because I use it for various dips and will even use dry seasonings on them if I'm in the mood.<br /><br />I don't know anybody that I've offered some of these to who didn't want a bag of their own! Highly recommended.",0.108660,0.000000,0.022080,0.157261,0.032282
561,These are a much healthy alternative to most chips and they taste great. They have a great crunch and flavor and don't have that bad after taste that most baked chips have. My only regret is that I didn't order more when they were on sale there all gone now but even at regular price are worth it.,0.084727,0.000000,0.200482,0.000000,0.091203
562,"These are so good, I've started getting them automatically. I like the original flavor, but they have a lot of others.",0.019073,0.000000,0.000000,0.085505,0.037631


In [85]:
# topic, sentiment, eda

In [86]:
final_topics = reviews_topics
final_topics.head()

Unnamed: 0,Text,orders,tast and texture,good,flavor,health
0,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.,0.0,0.0,0.403012,0.0,0.0
1,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more.",0.05508,0.0,0.023755,0.115179,0.088048
2,"I just love these chips! I was always a big fan of potato chips, but haven't had one since I discovered popchips. They are great for dipping or all alone. I am constantly re-ordering them. One note however-if you are on a low salt diet these chips are probably not for you. They are high in sodium. We go through a case every two months. If you love them it pays to join the subscribe and save program through Amazon. You save money and stay supplied!",0.067787,0.0,0.0,0.0,0.15389
3,"These tasted like potatoe stix, that we got in grade school with our lunches usually on pizza day. They were the bomb then, not so much now. Won't buy again unless I get them for cheap or free.",0.017647,0.002463,0.0,0.0,0.029204
4,"These chips are great! They look almost like a flattened rice cake, but taste so much better, more like a potato chip. The bbq flavor is delicious. They are very low in fat and full of flavor. It is easy to eat an entire bag of these!",0.0,0.016166,0.04086,0.044669,0.190659


In [88]:
final_topics['top_topic'] = final_topics.iloc[:, 1:].idxmax(axis=1)
final_topics.head()

Unnamed: 0,Text,orders,tast and texture,good,flavor,health,top_topic
0,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.,0.0,0.0,0.403012,0.0,0.0,good
1,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more.",0.05508,0.0,0.023755,0.115179,0.088048,flavor
2,"I just love these chips! I was always a big fan of potato chips, but haven't had one since I discovered popchips. They are great for dipping or all alone. I am constantly re-ordering them. One note however-if you are on a low salt diet these chips are probably not for you. They are high in sodium. We go through a case every two months. If you love them it pays to join the subscribe and save program through Amazon. You save money and stay supplied!",0.067787,0.0,0.0,0.0,0.15389,health
3,"These tasted like potatoe stix, that we got in grade school with our lunches usually on pizza day. They were the bomb then, not so much now. Won't buy again unless I get them for cheap or free.",0.017647,0.002463,0.0,0.0,0.029204,health
4,"These chips are great! They look almost like a flattened rice cake, but taste so much better, more like a potato chip. The bbq flavor is delicious. They are very low in fat and full of flavor. It is easy to eat an entire bag of these!",0.0,0.016166,0.04086,0.044669,0.190659,health


In [91]:
def get_sentiment(text):
  return SentimentIntensityAnalyzer().polarity_scores(text)['compound']

In [93]:
final_topics['sentiment'] = final_topics.Text.apply(get_sentiment)

In [94]:
final_topics.head()

Unnamed: 0,Text,orders,tast and texture,good,flavor,health,top_topic,sentiment
0,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.,0.0,0.0,0.403012,0.0,0.0,good,0.9244
1,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more.",0.05508,0.0,0.023755,0.115179,0.088048,flavor,0.7269
2,"I just love these chips! I was always a big fan of potato chips, but haven't had one since I discovered popchips. They are great for dipping or all alone. I am constantly re-ordering them. One note however-if you are on a low salt diet these chips are probably not for you. They are high in sodium. We go through a case every two months. If you love them it pays to join the subscribe and save program through Amazon. You save money and stay supplied!",0.067787,0.0,0.0,0.0,0.15389,health,0.979
3,"These tasted like potatoe stix, that we got in grade school with our lunches usually on pizza day. They were the bomb then, not so much now. Won't buy again unless I get them for cheap or free.",0.017647,0.002463,0.0,0.0,0.029204,health,0.8689
4,"These chips are great! They look almost like a flattened rice cake, but taste so much better, more like a potato chip. The bbq flavor is delicious. They are very low in fat and full of flavor. It is easy to eat an entire bag of these!",0.0,0.016166,0.04086,0.044669,0.190659,health,0.9613


In [95]:
final_topics.groupby('top_topic')['sentiment'].mean()

Unnamed: 0_level_0,sentiment
top_topic,Unnamed: 1_level_1
flavor,0.768537
good,0.816834
health,0.711142
orders,0.504758
tast and texture,0.842701


In [96]:
reviews.head()

Unnamed: 0,Id,UserId,Rating,Priority,Title,Text,Text_Clean
0,23689,A21SYVGVNG8RAS,5,Low,Yummy snacks!,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.,popchip bomb use parmesan garlic scoop cottage cheese healthy alternative chip dip healthy eat program save
1,23690,AQJYXC0MPRQJL,5,Low,Great chip that is different from the rest,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more.",like puff nature chip make unique chip market order salt vinegar absolutely love flavor hand favorite chip try cheddar regular flavor cheddar 45 regular 35 prefer strong flavor obviously case regular salt vinegar kind weak compare regular sv chip flavorful make want come
2,23691,A30NYUHEDLWI0Y,5,Low,Great Alternative to Potato Chips,"I just love these chips! I was always a big fan of potato chips, but haven't had one since I discovered popchips. They are great for dipping or all alone. I am constantly re-ordering them. One note however-if you are on a low salt diet these chips are probably not for you. They are high in sodium. We go through a case every two months. If you love them it pays to join the subscribe and save program through Amazon. You save money and stay supplied!",love chip big fan potato chip not discover popchip great dipping constantly reorder note howeverif low salt diet chip probably high sodium case month love pay join subscribe save program amazon save money stay supply
3,23692,A2NU55U9LKTB5J,3,High,Not somthing I would crave,"These tasted like potatoe stix, that we got in grade school with our lunches usually on pizza day. They were the bomb then, not so much now. Won't buy again unless I get them for cheap or free.",taste like potatoe stix get grade school lunch usually pizza day bomb will not buy cheap free
4,23693,A225F7QFP5LIW2,5,Low,healthy and delicious,"These chips are great! They look almost like a flattened rice cake, but taste so much better, more like a potato chip. The bbq flavor is delicious. They are very low in fat and full of flavor. It is easy to eat an entire bag of these!",chip great look like flattened rice cake taste well like potato chip bbq flavor delicious low fat flavor easy eat entire bag


In [125]:
movies_df['movie_info_clean'] = clean_and_normalizer(movies_df.movie_info)

In [139]:
tv3 = TfidfVectorizer(stop_words='english', min_df=0.02, max_df=0.2)
review_text_clean = tv3.fit_transform(movies_df.movie_info_clean)
reviews_text_clean_df = pd.DataFrame(review_text_clean.toarray(), columns=tv3.get_feature_names_out())

nmf2 = NMF(n_components=5, random_state=42, max_iter=500)
W_review = nmf2.fit_transform(reviews_text_clean_df)
H_review = nmf2.components_

In [140]:
W_review.shape

(166, 5)

In [141]:
H_review.shape

(5, 347)

In [142]:
H_review

array([[7.95902983e-02, 4.92483649e-02, 0.00000000e+00, ...,
        1.33677404e-01, 9.06631952e-02, 2.96163468e-01],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        1.59227139e-01, 0.00000000e+00, 1.62277558e-01],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        2.01117664e-02, 4.60964840e-02, 1.58275417e-02],
       [3.82900316e-02, 5.41194831e-02, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 4.66090348e-04],
       [0.00000000e+00, 0.00000000e+00, 5.78804981e-01, ...,
        9.14871399e-02, 1.70191328e-02, 5.91342373e-02]])

In [143]:
def display_topic(H, num_words=10):
  for topic_num, topic_array in enumerate(H):
    top_features = topic_array.argsort()[::-1][:num_words]
    top_words = [tv3.get_feature_names_out()[i] for i in top_features]
    print("Topic", topic_num+1, ":", ', '.join(top_words))

In [144]:
display_topic(H_review)

Topic 1 : family, father, grow, face, young, return, son, death, try, home
Topic 2 : film, true, star, base, follow, inspire, comedy, tell, man, event
Topic 3 : friend, good, live, dream, love, leave, meet, school, help, childhood
Topic 4 : set, force, evil, child, sinister, night, horror, neighborhood, follow, deadly
Topic 5 : academy, award, winner, nominee, help, violent, turn, skill, include, jason


In [145]:
doc_topics_moview_review = pd.DataFrame(W_review)
doc_topics_moview_review.columns = ['Drama', 'Comedy', 'Life', 'Horror', 'Violence']
doc_topics_moview_review

Unnamed: 0,Drama,Comedy,Life,Horror,Violence
0,0.000000,0.000000,0.381658,0.000000,0.000000
1,0.000000,0.000000,0.139677,0.214894,0.000000
2,0.007848,0.046280,0.115012,0.000000,0.003576
3,0.089396,0.113077,0.000000,0.000000,0.001947
4,0.000000,0.103786,0.129211,0.068007,0.000000
...,...,...,...,...,...
161,0.000000,0.000000,0.000000,0.199707,0.003454
162,0.000000,0.339017,0.000000,0.000000,0.000000
163,0.032567,0.155971,0.000000,0.067884,0.014087
164,0.000000,0.000000,0.294680,0.000000,0.000000


In [151]:
movies_reviews_main = pd.concat([movies_df.movie_title, movies_df.movie_info, doc_topics_moview_review], axis=1)
movies_reviews_main.head(2)

Unnamed: 0,movie_title,movie_info,Drama,Comedy,Life,Horror,Violence
0,A Dog's Journey,"Bailey (voiced again by Josh Gad) is living the good life on the Michigan farm of his ""boy,"" Ethan (Dennis Quaid) and Ethan's wife Hannah (Marg Helgenberger). He even has a new playmate: Ethan and Hannah's baby granddaughter, CJ. The problem is that CJ's mom, Gloria (Betty Gilpin), decides to take CJ away. As Bailey's soul prepares to leave this life for a new one, he makes a promise to Ethan to find CJ and protect her at any cost. Thus begins Bailey's adventure through multiple lives filled with love, friendship and devotion as he, CJ (Kathryn Prescott), and CJ's best friend Trent (Henry Lau) experience joy and heartbreak, music and laughter, and few really good belly rubs.",0.0,0.0,0.381658,0.0,0.0
1,A Dog's Way Home,"Separated from her owner, a dog sets off on an 400-mile journey to get back to the safety and security of the place she calls home. Along the way, she meets a series of new friends and manages to bring a little bit of comfort and joy to their lives.",0.0,0.0,0.139677,0.214894,0.0


In [152]:
movies_reviews_main['top_theme'] = movies_reviews_main.iloc[:, 2:].idxmax(axis=1)
movies_reviews_main.head(1)

Unnamed: 0,movie_title,movie_info,Drama,Comedy,Life,Horror,Violence,top_theme
0,A Dog's Journey,"Bailey (voiced again by Josh Gad) is living the good life on the Michigan farm of his ""boy,"" Ethan (Dennis Quaid) and Ethan's wife Hannah (Marg Helgenberger). He even has a new playmate: Ethan and Hannah's baby granddaughter, CJ. The problem is that CJ's mom, Gloria (Betty Gilpin), decides to take CJ away. As Bailey's soul prepares to leave this life for a new one, he makes a promise to Ethan to find CJ and protect her at any cost. Thus begins Bailey's adventure through multiple lives filled with love, friendship and devotion as he, CJ (Kathryn Prescott), and CJ's best friend Trent (Henry Lau) experience joy and heartbreak, music and laughter, and few really good belly rubs.",0.0,0.0,0.381658,0.0,0.0,Life


In [153]:
movies_reviews_main['sentiment'] = movies_reviews_main.movie_info.apply(get_sentiment)

In [154]:
movies_reviews_main.groupby('top_theme')['sentiment'].mean()

Unnamed: 0_level_0,sentiment
top_theme,Unnamed: 1_level_1
Comedy,0.019264
Drama,-0.262506
Horror,-0.226322
Life,0.546495
Violence,0.511785


In [181]:
def get_2_movie_name(theme_name):
  i = 0
  for idx, row in movies_reviews_main.iterrows():
    if row['top_theme'] == theme_name:
      i +=1
      print('movie_name: ', row['movie_title'])
      if i == 2:
        break

In [182]:
get_2_movie_name('Horror')

movie_name:  A Dog's Way Home
movie_name:  Aladdin


In [183]:
get_2_movie_name('Life')

movie_name:  A Dog's Journey
movie_name:  A Tuba to Cuba
