# Import Data

In [171]:
from newspaper import Article
from textblob import TextBlob, Word 
from textblob.wordnet import VERB
import plotly.express as px 

In [31]:
url = 'https://towardsdatascience.com/how-to-learn-data-science-when-life-does-not-give-you-a-break-a26a6ea328fd'
article = Article(url)
article.download()

In [32]:
article.parse()

In [34]:
text = article.text
text

'How to Learn Data Science when Life does not Give You a Break\n\nI Struggled to Dedicate Time for Data Science. But Finding new Strategies Enables me to Boost my Learning Rate and Accomplish More\n\nWith Every Struggle, there is a Way\n\nWith the goal of getting into the data science field, you decide on what sets of skills you want to obtain. Now you just need to have the time to sit down to obtain those skills. Eventually, you will become a competitive candidate and get the job that you want.\n\nBut life does not let you sit down. It demands you to keep going and does your normal stuff while still expects you to get 2+ years of experience in your resume. You feel stuck and frustrated. You know that you need to learn and get some projects done but find it so difficult to stick with your commitment. So should you give it up?\n\nPhoto by Steve Halama on Unsplash\n\nI experienced the same things 2 months ago. As a mathematics student with 5 classes and a job, I found it extremely diffic

# Tokenization

In [181]:
blob = TextBlob('When I was about to give up, I told myself to keep going. It is not about working harder; it is about working smarter.')

In [183]:
blob.words 

WordList(['When', 'I', 'was', 'about', 'to', 'give', 'up', 'I', 'told', 'myself', 'to', 'keep', 'going', 'It', 'is', 'not', 'about', 'working', 'harder', 'it', 'is', 'about', 'working', 'smarter'])

In [184]:
blob.words[0]

'When'

# Part-of-speech Tagging

In [60]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/user/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [185]:
blob.tags 

[('When', 'WRB'),
 ('I', 'PRP'),
 ('was', 'VBD'),
 ('about', 'RB'),
 ('to', 'TO'),
 ('give', 'VB'),
 ('up', 'RP'),
 ('I', 'PRP'),
 ('told', 'VBD'),
 ('myself', 'PRP'),
 ('to', 'TO'),
 ('keep', 'VB'),
 ('going', 'VBG'),
 ('It', 'PRP'),
 ('is', 'VBZ'),
 ('not', 'RB'),
 ('about', 'IN'),
 ('working', 'VBG'),
 ('harder', 'NN'),
 ('it', 'PRP'),
 ('is', 'VBZ'),
 ('about', 'IN'),
 ('working', 'VBG'),
 ('smarter', 'NN')]

# Noun phrase extraction

In [63]:
nltk.download('brown')

[nltk_data] Downloading package brown to /home/user/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.


True

In [65]:
blob = TextBlob('I just needed to switch my learning strategies.')

In [66]:
blob.noun_phrases

WordList(['learning strategies'])

# Sentiment Analysis

In [69]:
blob = TextBlob('Knowing that I can complete these things in a short amount of time makes me excited and be in the flow.')

blob.sentiment 

Sentiment(polarity=0.15833333333333333, subjectivity=0.48333333333333334)

In [68]:
blob.sentiment.polarity

-0.1

# Lemmatization

In [110]:
blob = TextBlob('Knowing that I can complete these things in a short amount of time makes me excited and be in the flow.')

In [111]:
blob.words 

WordList(['Knowing', 'that', 'I', 'can', 'complete', 'these', 'things', 'in', 'a', 'short', 'amount', 'of', 'time', 'makes', 'me', 'excited', 'and', 'be', 'in', 'the', 'flow'])

In [114]:
for word in blob.words:
    w = Word(word.lower())
    print(w.lemmatize('v'))

know
that
i
can
complete
these
things
in
a
short
amount
of
time
make
me
excite
and
be
in
the
flow


# Word Definitions

In [118]:
word = Word('browser')

In [119]:
word.synsets

[Synset('browser.n.01'), Synset('browser.n.02')]

In [120]:
word.definitions

['a viewer who looks around casually without seeking anything in particular',
 'a program used to view HTML documents']

# Spelling Correction

In [132]:
blob = TextBlob('I just neded to swicth my learnin strategies.')

In [133]:
blob.correct()

TextBlob("I just need to switch my learning strategics.")

In [136]:
Word('larnin').spellcheck()

[('learning', 0.25384615384615383),
 ('margin', 0.2153846153846154),
 ('latin', 0.13076923076923078),
 ('martin', 0.05384615384615385),
 ('lain', 0.046153846153846156),
 ('earning', 0.038461538461538464),
 ('marin', 0.007692307692307693),
 ('darwin', 0.007692307692307693)]

# Word and Noun Phrase Frequencies

In [190]:
blob = TextBlob(text)

In [191]:
blob.word_counts['i']

81

In [192]:
frequency = pd.DataFrame.from_dict(blob.word_counts, orient='index', columns=['count'])
plot = px.bar(frequency.sort_values(by='count', ascending=False)[:30])
plot 