# NLP Analysis

## Getting data into text string

In [26]:
import requests

url = "https://raw.githubusercontent.com/junyang-chin/DSUB-5.0/main/Powells_rate-hike_plans_get_jolted_by_inflation.txt"

In [27]:
response = requests.get(url)
text = response.text
text = str(text)

In [28]:
print(text)

NEW YORK: Jerome Powell could deliver a hawkish surprise on Wednesday even after effectively pre-announcing 50-basis-point interest-rate increases at the Federal Reserve’s (Fed) meeting this week and in July.

May’s red-hot inflation print hardened expectations the Fed would keep raising borrowing costs at that pace through September, with some investors betting the Fed chair will deliver a super-sized 75-basis-point move unless price pressures cool.

Powell could reinforce that speculation during his post-meeting press conference by declining to take 75 basis points off the table – as he explicitly did last month by stating such a move wasn’t being actively considered – or by emphasising the need for nimble policy to cool surging prices.

Data released Friday hammered home the message that the US central bank has a lot of work still to do in containing price pressures. Consumer prices excluding food and energy rose 8.6% in the 12 months through May, quickening to a fresh 40-year high.

## Removing punctuations

In [29]:
punctuations = """!"#$%&'()*+, -./:;<=>?@[\]^_`{|}~"""

for character in text:
    if character in punctuations:
        text = text.replace(character, " ")

text = text.lower() # converting all into small letter words
        

In [30]:
print(text)

new york  jerome powell could deliver a hawkish surprise on wednesday even after effectively pre announcing 50 basis point interest rate increases at the federal reserve’s  fed  meeting this week and in july 

may’s red hot inflation print hardened expectations the fed would keep raising borrowing costs at that pace through september  with some investors betting the fed chair will deliver a super sized 75 basis point move unless price pressures cool 

powell could reinforce that speculation during his post meeting press conference by declining to take 75 basis points off the table – as he explicitly did last month by stating such a move wasn’t being actively considered – or by emphasising the need for nimble policy to cool surging prices 

data released friday hammered home the message that the us central bank has a lot of work still to do in containing price pressures  consumer prices excluding food and energy rose 8 6  in the 12 months through may  quickening to a fresh 40 year high 

## Tokenise each word

In [31]:
tokens = text.split() # Splitting the string into a list of words

In [32]:
print(tokens)

['new', 'york', 'jerome', 'powell', 'could', 'deliver', 'a', 'hawkish', 'surprise', 'on', 'wednesday', 'even', 'after', 'effectively', 'pre', 'announcing', '50', 'basis', 'point', 'interest', 'rate', 'increases', 'at', 'the', 'federal', 'reserve’s', 'fed', 'meeting', 'this', 'week', 'and', 'in', 'july', 'may’s', 'red', 'hot', 'inflation', 'print', 'hardened', 'expectations', 'the', 'fed', 'would', 'keep', 'raising', 'borrowing', 'costs', 'at', 'that', 'pace', 'through', 'september', 'with', 'some', 'investors', 'betting', 'the', 'fed', 'chair', 'will', 'deliver', 'a', 'super', 'sized', '75', 'basis', 'point', 'move', 'unless', 'price', 'pressures', 'cool', 'powell', 'could', 'reinforce', 'that', 'speculation', 'during', 'his', 'post', 'meeting', 'press', 'conference', 'by', 'declining', 'to', 'take', '75', 'basis', 'points', 'off', 'the', 'table', '–', 'as', 'he', 'explicitly', 'did', 'last', 'month', 'by', 'stating', 'such', 'a', 'move', 'wasn’t', 'being', 'actively', 'considered', '–

## Removing no context words i.e. stop words

In [33]:
from collections import Counter
print(Counter(tokens).most_common(50)) # Finding the top 50 word

[('the', 49), ('a', 20), ('to', 18), ('of', 18), ('in', 16), ('and', 14), ('is', 10), ('on', 9), ('at', 9), ('will', 9), ('with', 7), ('fed', 6), ('as', 6), ('data', 6), ('central', 6), ('likely', 6), ('point', 5), ('rate', 5), ('inflation', 5), ('bank', 5), ('deliver', 4), ('even', 4), ('this', 4), ('week', 4), ('that', 4), ('by', 4), ('–', 4), ('rates', 4), ('hike', 4), ('show', 4), ('could', 3), ('basis', 3), ('move', 3), ('price', 3), ('for', 3), ('prices', 3), ('friday', 3), ('has', 3), ('may', 3), ('year', 3), ('saw', 3), ('bank’s', 3), ('expected', 3), ('officials', 3), ('boe', 3), ('own', 3), ('decision', 3), ('yen', 3), ('economy', 3), ('new', 2)]


These are stop words and do not provide any context meaning. We call them stop words

In [34]:
# stopwords

url = "https://raw.githubusercontent.com/theleadio/datascience_demo/master/stopwords.txt"

response = requests.get(url)

stopwords = response.text.splitlines() # creating a list of stopwords with a line delimitter 
print(stopwords)

['a', 'about', 'above', 'across', 'after', 'again', 'against', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'among', 'an', 'and', 'another', 'any', 'anybody', 'anyone', 'anything', 'anywhere', 'are', 'area', 'areas', 'around', 'as', 'ask', 'asked', 'asking', 'asks', 'at', 'away', 'b', 'back', 'backed', 'backing', 'backs', 'be', 'became', 'because', 'become', 'becomes', 'been', 'before', 'began', 'behind', 'being', 'beings', 'best', 'better', 'between', 'big', 'both', 'but', 'by', 'c', 'came', 'can', 'cannot', 'case', 'cases', 'certain', 'certainly', 'clear', 'clearly', 'come', 'could', 'd', 'did', 'differ', 'different', 'differently', 'do', 'does', 'done', 'down', 'down', 'downed', 'downing', 'downs', 'during', 'e', 'each', 'early', 'either', 'end', 'ended', 'ending', 'ends', 'enough', 'even', 'evenly', 'ever', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'f', 'face', 'faces', 'fact', 'facts', 'far', 'felt', 'few', 'find', 'finds', 'first

In [35]:
tokens = (t for t in tokens if t not in stopwords and len(t) > 1) # Remove empty words with length = 0

In [36]:
print(Counter(tokens).most_common(50))

[('fed', 6), ('data', 6), ('central', 6), ('rate', 5), ('inflation', 5), ('bank', 5), ('deliver', 4), ('week', 4), ('rates', 4), ('hike', 4), ('basis', 3), ('move', 3), ('price', 3), ('prices', 3), ('friday', 3), ('bank’s', 3), ('expected', 3), ('officials', 3), ('boe', 3), ('decision', 3), ('yen', 3), ('economy', 3), ('powell', 2), ('wednesday', 2), ('increases', 2), ('meeting', 2), ('july', 2), ('may’s', 2), ('raising', 2), ('borrowing', 2), ('costs', 2), ('75', 2), ('pressures', 2), ('cool', 2), ('policy', 2), ('surging', 2), ('consumer', 2), ('release', 2), ('economists', 2), ('projections', 2), ('hikes', 2), ('march', 2), ('survey', 2), ('day', 2), ('debate', 2), ('half', 2), ('boj', 2), ('time', 2), ('increasingly', 2), ('coming', 2)]
