# How to Keyword Extract?

### ■ Rapid Keyword Extraction (RAKE) Algorithm in Natural Language Processing

In [3]:
# !pip install rake-nltk

Collecting rake-nltk
  Downloading rake_nltk-1.0.6-py3-none-any.whl (9.1 kB)
Installing collected packages: rake-nltk
Successfully installed rake-nltk-1.0.6


In [1]:
import nltk
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [4]:
from rake_nltk import Rake

### COMMON USE
# r = Rake()

### to control the max or min words in a phrase
r = Rake(min_length=1, max_length=4)

### To include all phrases only once and ignore the repetitions
# r = Rake(include_repeated_phrases=False)

text="think would rather drink my own piss. Actually would probably drink someone else piss before drinking this again"

r.extract_keywords_from_text(text)
r.get_ranked_phrases()

['think would rather drink', 'piss', 'drinking']

In [5]:
#SPECIAL CASES
r.get_ranked_phrases_with_scores()

[(16.0, 'think would rather drink'), (1.0, 'piss'), (1.0, 'drinking')]

### ■ Apply RAKE to sample csv file

In [10]:
import pandas as pd
from rake_nltk import Rake

def apply_rake(text):
    r = Rake(min_length=1, max_length=4)
    r.extract_keywords_from_text(text)
    return r.get_ranked_phrases()

# assuming the csv file name is 'reviews.csv' and the review column name is 'Review'
csv_file = "../../Data/Preprocessed_data/pp_selected_reviews.csv"
df = pd.read_csv(csv_file)
df['Review'] = df['Review'].astype(str)
df

Unnamed: 0,Review,Beer_name,MultinomialNB_label
0,surprisingly little taste. it is fresh and a t...,Asahi Super Dry,Positive
1,pours an almost honey color with an extremely ...,Asahi Super Dry,Positive
2,"smell and taste of maltiness, grass and bread....",Asahi Super Dry,Positive
3,no redeeming features of this 'beer' other tha...,Asahi Super Dry,Negative
4,"at least, there is no harsh off flavours. what...",Asahi Super Dry,Negative
...,...,...,...
2100,shrewd metal piece of work and a aroma. with d...,Asahi Super Dry,Negative
2101,freak color. it. equal compose canadian molson...,Asahi Super Dry,Negative
2102,middling japanese beer weak watery. suck,Asahi Super Dry,Negative
2103,"character. only thirsty. designate smell, big ...",Asahi Super Dry,Negative


In [11]:
# create a new column 'keywords' to store the rake-extracted keywords
df['keywords'] = df['Review'].apply(lambda x: apply_rake(x))

# save the result to a new csv file ('reviews_with_keywords.csv')
# df.to_csv('reviews_with_keywords.csv', index=False)
df.sample(10)

Unnamed: 0,Review,Beer_name,MultinomialNB_label,keywords
364,"very clean and simple lager, clear slightly da...",Asahi Super Dry,Positive,"[favorite asian restaurant, asahi draft dispen..."
1642,"for the co op in davis, ca. pours very dk brow...",8 Wired iStout,Positive,"[slight creamy tan head, lightly smoked burnt ..."
2075,"feeble carbonation, sweet, metallic. non flat ...",Asahi Super Dry,Negative,"[weak though medium, feeble carbonation, sweet..."
479,"clear, a bit darker, golden colour with head t...",Asahi Super Dry,Positive,"[mostly like earth, moderate bitter aftertaste..."
929,"just like the says, dry ,very dry at the finis...",Asahi Super Dry,Positive,"[ok beer clean, japenese steakhouse, extra lar..."
1772,angstrom draught daruma austin. bad. of atomic...,Asahi Super Dry,Negative,"[angstrom draught daruma austin, light cider, ..."
1417,pitch black in the glass. no head at all. good...,8 Wired iStout,Positive,"[good roasted malts, smooth brew, pitch black,..."
30,light and crispy. bitter and sour at the start...,Asahi Super Dry,Negative,"[would go well, metallic tang, macro lagers, s..."
915,pale lager which tasted about like i expected....,Asahi Super Dry,Negative,"[yellow color, pale lager, average beer, turn,..."
407,". clear gold with a frothy white head. watery,...",Asahi Super Dry,Negative,"[would accept one given, little grainand yes, ..."
