# Import Data

In [None]:
pip install textblob 

In [None]:
!python -m textblob.download_corpora

# Tokenization

In [57]:
text = 'Today, the weather is nice' 

In [58]:
from textblob import TextBlob

blob = TextBlob(text)

In [70]:
blob.words 

WordList(['Today', 'the', 'weather', 'is', 'nice'])

In [71]:
blob.words[0]

'Today'

# Part-of-speech Tagging

In [73]:
blob.tags 

[('Today', 'NN'),
 ('the', 'DT'),
 ('weather', 'NN'),
 ('is', 'VBZ'),
 ('nice', 'JJ')]

# Noun phrase extraction

In [75]:
blob = TextBlob('The data science field is very broad, and there is a tremendous amount of available information')

In [77]:
blob.noun_phrases

WordList(['data science field', 'tremendous amount', 'available information'])

# Sentiment Analysis

In [None]:
blob = TextBlob('I am excited about our camping trip this weekend')

blob.sentiment 

In [None]:
blob.sentiment.polarity

# Lemmatization

In [81]:
from textblob import Word

Word('ate').lemmatize(pos='v')

'eat'

In [82]:
blob = TextBlob('When was the last time a project you were working on challenged you?')

In [83]:
blob.words 

WordList(['When', 'was', 'the', 'last', 'time', 'a', 'project', 'you', 'were', 'working', 'on', 'challenged', 'you'])

In [84]:
for word in blob.words:
    w = Word(word.lower())
    print(w.lemmatize('v'))

when
be
the
last
time
a
project
you
be
work
on
challenge
you


# Word Definitions

In [85]:
word = Word('browser')

In [88]:
word.definitions

['a viewer who looks around casually without seeking anything in particular',
 'a program used to view HTML documents']

# Spelling Correction

In [89]:
blob = TextBlob('When was the lats time a poject you were working on chalenged you?')

In [90]:
blob.correct()

TextBlob("When was the last time a project you were working on challenged you?")

In [91]:
Word('lats').spellcheck()

[('last', 0.525092936802974),
 ('laws', 0.21654275092936803),
 ('late', 0.1533457249070632),
 ('lads', 0.03531598513011153),
 ('oats', 0.013940520446096654),
 ('lasts', 0.00929368029739777),
 ('hats', 0.00929368029739777),
 ('lots', 0.007434944237918215),
 ('eats', 0.007434944237918215),
 ('lets', 0.006505576208178439),
 ('lays', 0.0037174721189591076),
 ('lata', 0.0037174721189591076),
 ('rats', 0.0018587360594795538),
 ('mats', 0.0018587360594795538),
 ('cats', 0.0018587360594795538),
 ('las', 0.0009293680297397769),
 ('flats', 0.0009293680297397769),
 ('fats', 0.0009293680297397769)]

# Word and Noun Phrase Frequencies

In [None]:
pip install newspaper3k  

In [100]:
from newspaper import Article

url = 'https://www.dataquest.io/blog/learn-data-science/'
article = Article(url)
article.download()

In [101]:
article.parse()

In [108]:
text = article.text
text

'April 27, 2022\n\nHow to Learn Data Science in 2022 (A CEO’s In-Depth Guide)\n\nThe demand for data scientists is at an all time high. If you’re considering a career in data science, now’s the best time to get started.\n\nBut what’s the best way to learn data science?\n\nThat’s a complicated question — I know from experience. A few years ago, I decided to pursue a data science career, but when I researched what I needed to learn, all I could find were long lists of data science courses to take and books to read.\n\nHowever, studies show that most people learn best by doing, not by watching videos or memorizing textbooks.\n\nSo what’s the most effective way to learn data science? I’ve broken it down into five easy steps.\n\n1. Find a reason to learn\n\nThe data science field is very broad, and there’s a tremendous amount of available information. That means it can be difficult to determine what you should focus on. The secret to navigating all this information is a reason to learn. Ide

In [109]:
blob = TextBlob(text)

In [110]:
blob.word_counts

defaultdict(int,
            {'april': 1,
             '27': 1,
             '2022': 2,
             'how': 8,
             'to': 68,
             'learn': 22,
             'data': 48,
             'science': 30,
             'in': 14,
             'a': 55,
             'ceo': 1,
             '’': 44,
             's': 15,
             'in-depth': 1,
             'guide': 2,
             'the': 46,
             'demand': 1,
             'for': 10,
             'scientists': 5,
             'is': 11,
             'at': 10,
             'an': 1,
             'all': 7,
             'time': 6,
             'high': 1,
             'if': 15,
             'you': 69,
             're': 11,
             'considering': 1,
             'career': 2,
             'now': 3,
             'best': 3,
             'get': 6,
             'started': 4,
             'but': 6,
             'what': 10,
             'way': 5,
             'that': 14,
             'complicated': 1,
             'question': 1,


In [111]:
blob.word_counts['i']

19

In [105]:
import pandas as pd  
import plotly.express as px 

In [112]:
frequency = pd.DataFrame.from_dict(
    blob.word_counts, orient='index', columns=['count']
)
plot = px.bar(
    frequency.sort_values(by='count', ascending=False)[:30]
)
plot 