In [29]:
import pandas as pd

# create a list of sentences
data = [
    "When life gives you lemons, make lemonade! 🙂",
    "She bought 2 lemons for $1 at Maven Market.",
    "A dozen lemons will make a gallon of lemonade. [AllRecipes]",
    "lemon, lemon, lemons, lemon, lemon, lemons",
    "He's running to the market to get a lemon — there's a great sale today.",
    "iced tea is my favorite",
    "I didn't like the taste of that lemonade at all.",
    "My lemons went bad before I could use them, unfortunately.",
]

# expand the column width to see the full sentences
pd.set_option('display.max_colwidth', None)

# turn it into a dataframe
data_df = pd.DataFrame(data, columns=["sentence"])
data_df.head()

# make a copy of the dataframe
df = data_df.copy()
df.head()

Unnamed: 0,sentence
0,"When life gives you lemons, make lemonade! 🙂"
1,She bought 2 lemons for $1 at Maven Market.
2,A dozen lemons will make a gallon of lemonade. [AllRecipes]
3,"lemon, lemon, lemons, lemon, lemon, lemons"
4,He's running to the market to get a lemon — there's a great sale today.


In [30]:
# import the VADER sentiment library
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

text = df.sentence[0]
text

'When life gives you lemons, make lemonade! 🙂'

In [31]:
# apply the sentiment analyzer to the entire dataframe

# create an analyzer object
analyzer = SentimentIntensityAnalyzer()

# define a function to get the score
def get_sentiment(text):
    return analyzer.polarity_scores(text)['compound']

# apply the function
df['sentiment'] = df['sentence'].apply(get_sentiment)
df

Unnamed: 0,sentence,sentiment
0,"When life gives you lemons, make lemonade! 🙂",0.4587
1,She bought 2 lemons for $1 at Maven Market.,0.0
2,A dozen lemons will make a gallon of lemonade. [AllRecipes],0.0
3,"lemon, lemon, lemons, lemon, lemon, lemons",0.0
4,He's running to the market to get a lemon — there's a great sale today.,0.6249
5,iced tea is my favorite,0.4588
6,I didn't like the taste of that lemonade at all.,-0.2755
7,"My lemons went bad before I could use them, unfortunately.",-0.7096


In [32]:
# import libraries
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [35]:
reviews = pd.read_excel('./data/popchip_reviews.xlsx')

In [None]:
reviews.head()

Unnamed: 0,Id,UserId,Rating,Priority,Title,Text
0,23689,A21SYVGVNG8RAS,5,Low,Yummy snacks!,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.
1,23690,AQJYXC0MPRQJL,5,Low,Great chip that is different from the rest,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more."
2,23691,A30NYUHEDLWI0Y,5,Low,Great Alternative to Potato Chips,"I just love these chips! I was always a big fan of potato chips, but haven't had one since I discovered popchips. They are great for dipping or all alone. I am constantly re-ordering them. One note however-if you are on a low salt diet these chips are probably not for you. They are high in sodium. We go through a case every two months. If you love them it pays to join the subscribe and save program through Amazon. You save money and stay supplied!"
3,23692,A2NU55U9LKTB5J,3,High,Not somthing I would crave,"These tasted like potatoe stix, that we got in grade school with our lunches usually on pizza day. They were the bomb then, not so much now. Won't buy again unless I get them for cheap or free."
4,23693,A225F7QFP5LIW2,5,Low,healthy and delicious,"These chips are great! They look almost like a flattened rice cake, but taste so much better, more like a potato chip. The bbq flavor is delicious. They are very low in fat and full of flavor. It is easy to eat an entire bag of these!"


In [39]:
reviews.shape

(564, 6)

In [40]:
reviews.Priority.value_counts()

Priority
Low     447
High    117
Name: count, dtype: int64

In [62]:
# import necessary libraries
import pandas as pd
import spacy

# download the spacy model
nlp = spacy.load("en_core_web_sm")

# helper functions from text preprocessing section
def lower_replace(series):
    output = series.str.lower()
    output = output.str.replace(r'\[.*?\]', '', regex=True)
    output = output.str.replace(r'[^\w\s]', '', regex=True)
    return output

def token_lemma_nonstop(text):
    doc = nlp(text)
    output = [token.lemma_ for token in doc if not token.is_stop]
    return ' '.join(output)

def clean_and_normalize(series):
    output = lower_replace(series)
    output = output.apply(token_lemma_nonstop)
    return output

In [63]:
reviews['Text_clean'] = clean_and_normalize(reviews.Text)

In [64]:
cv = CountVectorizer(stop_words='english', min_df=.2, ngram_range=(1,2))
X = cv.fit_transform(reviews.Text_clean)
X_df = pd.DataFrame(X.toarray(), columns=cv.get_feature_names_out())

In [65]:
X_df

Unnamed: 0,bag,buy,calorie,chip,eat,flavor,good,great,like,love,popchip,potato,potato chip,salt,snack,taste,try
0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
1,0,0,0,4,0,3,0,0,1,1,0,0,0,2,0,0,1
2,0,0,0,3,0,0,0,1,0,2,1,1,1,1,0,0,0
3,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0
4,1,0,0,2,1,2,0,1,2,0,0,1,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,0,0,0,3,3,1,1,5,0,1,1,4,3,0,0,1,0
560,1,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1
561,0,0,0,2,0,1,0,2,0,0,0,0,0,0,0,2,0
562,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0


In [66]:
y = reviews.Priority

In [67]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)
model = MultinomialNB()
model.fit(X_train, y_train)
y_preds = model.predict(X_test)

print("Accuracy: ", accuracy_score(y_test,y_preds))
print(classification_report(y_test, y_preds))

Accuracy:  0.8495575221238938
              precision    recall  f1-score   support

        High       0.67      0.21      0.32        19
         Low       0.86      0.98      0.92        94

    accuracy                           0.85       113
   macro avg       0.76      0.59      0.62       113
weighted avg       0.83      0.85      0.82       113



In [68]:
# test it out on new reviews
new_reviews = pd.Series([
    "Pop chips are my favorite! I love these chips so much.",
    "Taste bad. I don't like the flavor options or taste.",
    "Solid snack."
])

# clean and vectorize the new reviews using the same processes as earlier
new_reviews_clean = clean_and_normalize(new_reviews)
new_reviews_df = pd.DataFrame(cv.transform(new_reviews_clean).toarray(), columns=cv.get_feature_names_out())

# make a prediction
model.predict(new_reviews_df)



array(['Low', 'High', 'Low'], dtype='<U4')

In [70]:
tfidf = TfidfVectorizer(stop_words='english', min_df=.2, ngram_range=(1,2))
Xt = tfidf.fit_transform(reviews.Text_clean)
Xt_df = pd.DataFrame(Xt.toarray(), columns=tfidf.get_feature_names_out())
Xt_df

Unnamed: 0,bag,buy,calorie,chip,eat,flavor,good,great,like,love,popchip,potato,potato chip,salt,snack,taste,try
0,0.000000,0.000000,0.0,0.519418,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.854520,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.0,0.561525,0.000000,0.536897,0.000000,0.000000,0.195643,0.213896,0.000000,0.000000,0.000000,0.513405,0.000000,0.000000,0.220948
2,0.000000,0.000000,0.0,0.517797,0.000000,0.000000,0.000000,0.295038,0.000000,0.525969,0.283951,0.277296,0.333259,0.315616,0.000000,0.000000,0.000000
3,0.000000,0.690063,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.512918,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.510616,0.000000
4,0.252820,0.000000,0.0,0.340807,0.285695,0.434480,0.000000,0.291286,0.474967,0.000000,0.000000,0.273769,0.329020,0.000000,0.000000,0.236418,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,0.000000,0.000000,0.0,0.216030,0.362191,0.091802,0.103620,0.615464,0.000000,0.109720,0.118467,0.462762,0.417116,0.000000,0.000000,0.099907,0.000000
560,0.381815,0.000000,0.0,0.000000,0.000000,0.328081,0.000000,0.439906,0.000000,0.000000,0.423375,0.000000,0.000000,0.000000,0.457993,0.000000,0.405042
561,0.000000,0.000000,0.0,0.399898,0.000000,0.254906,0.000000,0.683580,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.554818,0.000000
562,0.000000,0.000000,0.0,0.000000,0.000000,0.536898,0.606014,0.000000,0.586930,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [72]:
Xt_train, Xt_test, yt_train, yt_test = train_test_split(Xt, y, test_size=.2, random_state=42)
model_lr = LogisticRegression()
model_lr.fit(Xt_train, yt_train)
yt_preds = model_lr.predict(Xt_test)

print("Accuracy: ", accuracy_score(yt_test,yt_preds))
print(classification_report(yt_test, yt_preds))

Accuracy:  0.8407079646017699
              precision    recall  f1-score   support

        High       1.00      0.05      0.10        19
         Low       0.84      1.00      0.91        94

    accuracy                           0.84       113
   macro avg       0.92      0.53      0.51       113
weighted avg       0.87      0.84      0.78       113



In [80]:
reviews['Prediction_NB'] =  model.predict_proba(X_df)[:,0]
reviews['Prediction_LR'] =  model_lr.predict_proba(Xt_df)[:,0]



In [81]:
reviews.sort_values(by='Prediction_NB', ascending=False).head()

Unnamed: 0,Id,UserId,Rating,Priority,Title,Text,Text_clean,Prediction_LR,Prediction_NB
550,24239,A2ZKS33N6Y3EPC,3,High,"Taste more like ""Tomato and Basil"" than ""Chili and Lime""","NOTE: This review is for the Chili and Lime Flavor Popchip. Amazon had a separate page for it but then merged the product and its reviews into one.<br /><br />It's hard to objectively review food since everyone's palate and tastes are different. So what I can say about this particular Popchip flavor that should be useful for most folks out there is that it doesn't really taste like Chili and Lime you're ""probably"" expecting. The Chili and Lime most folks probably are expecting if they grew up on Frito Lay products is very sharp and sweet (and of course artificial) - but it's what we liked if we ate more than a bag.<br /><br />The best way I can describe this flavor is that it has a ""tomato"" like taste to it with a somewhat tangy sour note that is suppose to be the lime component. Together they turn into an odd combination that registers other flavors in your mind than Chili and Lime - at least it did to me and others who tasted it with me. If you eat the skin of a green bell pepper, you can kind of get at what Popchips were trying to do with the Chili taste on this version, but I have no idea how some sour salt can be akin to lime. For myself personally, I thought it tasted like ""Tomato and Basil"" you would find on Pita chip flavors and baked snacks.<br /><br />Whether or not you agree with my above description of the flavor, I would highly suggest you try to get this in a sample pack and try it out first. BBQ + Salt & Vinegar Popchips are still my staples for now.",note review chili lime flavor popchip amazon separate page merge product review onebr br hard objectively review food everyone palate taste different particular popchip flavor useful folk not taste like chili lime probably expect chili lime folk probably expect grow frito lay product sharp sweet course artificial like eat bagbr br good way describe flavor tomato like taste somewhat tangy sour note suppose lime component turn odd combination register flavor mind chili lime taste eat skin green bell pepper kind popchip try chili taste version idea sour salt akin lime personally think taste like tomato basil find pita chip flavor bake snacksbr br agree description flavor highly suggest try sample pack try bbq salt vinegar popchip staple,0.468809,0.96983
96,23785,AE5AHEH3NLPBZ,3,High,Tastes Like Celery,"I really like pop chips, but this flavor isn't the best. I was expecting these to taste like chili peppers and lime (Spicy, Sweet, and Tart), but instead of going for a chili pepper taste, they went for a chili the food taste. This wouldn't be so bad, except they taste overwhelmingly of tomato and celery. The reason they didn't call them Tomato and Celery Chips is because it is sounds gross and no one would buy that, and unfortunately it tastes like it sounds.",like pop chip flavor not good expect taste like chili pepper lime spicy sweet tart instead go chili pepper taste go chili food taste not bad taste overwhelmingly tomato celery reason not tomato celery chip sound gross buy unfortunately taste like sound,0.475328,0.834434
463,24152,A2ZMMQ4W17EK2N,2,High,Original PopChips,"Bought the Original flavor from the store and just tried them tonight. They were very greasy and salty. I did not like them a lot. I will not purchase this original flavor again. However I can't complain because I got the 3 ounce bag for only $1.00 at the store while they were on sale. I tried the BBq flavor and they are delicious. I bought the sea salt & vinegar, and cheddar but haven't tried those yet.",buy original flavor store try tonight greasy salty like lot purchase original flavor not complain get 3 ounce bag 100 store sale try bbq flavor delicious buy sea salt vinegar cheddar not try,0.442119,0.771203
21,23710,ASIMCC20UVK58,5,Low,Great Chips Less Fat,"I eat chips almost every day and decided I wanted to find something that tastes as good but is lighter on unnecessary fat than regular types of chips. I bought a case of Popchips BBQ. These are satisfying and taste great. They don't taste exactly like any full fat chip products I've had mainly because they're not greasy at all, but they have a nice BBQ potato chip flavor. These are thick, crunchy, and light. I first bought the .8 oz bags and this serving size is on the small side for me with lunch (would probably be alright for a snack). 3 of the .8 oz bags works for me which of course bumps up the fat intake, but considering the same volume of ""regular"" chips has much more fat it is a significant fat decrease overall which is what I was looking for. I find the 3 ounce bags to be perfect. Even eating all 3 ounces works out to significantly less fat and calories than eating the same volume of other chips. This makes Popchips very satisfying to me, and I have bought many cases through Amazon.<br /><br />Heads up (mid-2011): Unfortunately the price has gone up significantly for these chips through Amazon, causing me to cancel my subscribe & save subscriptions. Popchips have popped up in local stores for significantly less per ounce. I love the convenience of the portioned bags and subscription but it's hard to justify paying double for the same product.<br /><br />The flavors are pretty straight forward but here's my thoughts...<br />Original flavor: Tastes like a plain potato chip minus the grease. Not my favorite flavor, but good for what it is. This flavor would probably be good with some kind of dip.<br />Chedder: Cheddar quickly became tied with BBQ for my favorite. Like BBQ the cheddar flavor is very strong. Great chips.<br />Salt & Pepper: Very strong pepper. To enjoy these you have to really like pepper. I like them, but they're not a favorite.<br />Sea Salt & Vinegar: I'm not a fan of vinegar, but strangely I enjoy this flavor. They're indeed salty with a fairly strong vinegar flavor.",eat chip day decide want find taste good light unnecessary fat regular type chip buy case popchip bbq satisfy taste great not taste exactly like fat chip product ve mainly greasy nice bbq potato chip flavor thick crunchy light buy 8 oz bag serve size small lunch probably alright snack 3 8 oz bag work course bump fat intake consider volume regular chip fat significant fat decrease overall look find 3 ounce bag perfect eat 3 ounce work significantly fat calorie eat volume chip make popchip satisfying buy case amazonbr br head mid2011 unfortunately price go significantly chip amazon cause cancel subscribe save subscription popchip pop local store significantly ounce love convenience portion bag subscription hard justify pay double productbr br flavor pretty straight forward here thoughtsbr original flavor taste like plain potato chip minus grease favorite flavor good flavor probably good kind dipbr chedder cheddar quickly tie bbq favorite like bbq cheddar flavor strong great chipsbr salt pepper strong pepper enjoy like pepper like favoritebr sea salt vinegar m fan vinegar strangely enjoy flavor salty fairly strong vinegar flavor,0.245016,0.762466
157,23846,A1HYH206E18XVC,5,Low,Tangy and terrific,"When I asked my older daughter to describe this flavor, she said to be sure to mention the word tangy. That is a fair description as the lime does heighten the taste buds and enhances the slight heat from the chili.<br /><br />My family really enjoys this flavor and it is among our favorites. We have tried most of the other varieties of Popchips and have our own preferences. My older daughter likes salt and pepper, barbecue, and this flavor the best. I like barbecue, sour cream and onion, and this flavor the best. My wife likes salt and vinegar and this flavor the best. My younger daughter does not like this flavor. She prefers barbecue, cheese, and sour cream and onion. Our least favorite is the original, probably because it is so plain by comparison.<br /><br />To me, Popchips are sort of a cross of potato chips, popcorn, and rice cakes. They are potato, but popped like popcorn and sort of puffy like rice cakes. They definitely have more flavor than many rice cakes and are a nice alternative to popcorn. They also can be used with dips although they never seem to last very long in our house.",ask old daughter describe flavor say sure mention word tangy fair description lime heighten taste bud enhance slight heat chilibr br family enjoy flavor favorite try variety popchip preference old daughter like salt pepper barbecue flavor good like barbecue sour cream onion flavor good wife like salt vinegar flavor good young daughter like flavor prefer barbecue cheese sour cream onion favorite original probably plain comparisonbr br popchip sort cross potato chip popcorn rice cake potato pop like popcorn sort puffy like rice cake definitely flavor rice cake nice alternative popcorn dip long house,0.307052,0.729732
