In [None]:
!pip install textblob
!pip freeze > requirements.txt

In [2]:
from textblob import TextBlob

In [3]:
text = "In this course we will learn how to perform NLP with TextBlob. TextBlob is a Phython package that's built on top of the famous Python package called NLTK."

In [5]:
blob = TextBlob(text)
type(blob), len(blob)

(textblob.blob.TextBlob, 154)

In [7]:
import nltk
nltk.download("punkt")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\한국IT비즈니스진흥협회\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
# 단어 처리

In [8]:
result = blob.words
type(result), len(result), result

(textblob.blob.WordList,
 29,
 WordList(['In', 'this', 'course', 'we', 'will', 'learn', 'how', 'to', 'perform', 'NLP', 'with', 'TextBlob', 'TextBlob', 'is', 'a', 'Phython', 'package', 'that', "'s", 'built', 'on', 'top', 'of', 'the', 'famous', 'Python', 'package', 'called', 'NLTK']))

In [10]:
type(result[5]), len(result[5]), result[5]

(textblob.blob.Word, 5, 'learn')

In [12]:
type(result[5].string), len(result[5].string), result[5].string

(str, 5, 'learn')

In [11]:
new_string = "Test " + result[0]
type(new_string), len(new_string), new_string

(str, 7, 'Test In')

In [13]:
# 문장 (Sentence) 처리 

In [14]:
sentences = blob.sentences
type(sentences), len(sentences), sentences

(list,
 2,
 [Sentence("In this course we will learn how to perform NLP with TextBlob."),
  Sentence("TextBlob is a Phython package that's built on top of the famous Python package called NLTK.")])

In [16]:
for sentence in sentences:
    print(type(sentence), len(sentence), sentence)

<class 'textblob.blob.Sentence'> 62 In this course we will learn how to perform NLP with TextBlob.
<class 'textblob.blob.Sentence'> 91 TextBlob is a Phython package that's built on top of the famous Python package called NLTK.


In [19]:
type(sentences[0]), len(sentences[0]), sentences[0] 

(textblob.blob.Sentence,
 62,
 Sentence("In this course we will learn how to perform NLP with TextBlob."))

In [20]:
sentence = sentences[1]
type(sentence), len(sentence), sentence

(textblob.blob.Sentence,
 91,
 Sentence("TextBlob is a Phython package that's built on top of the famous Python package called NLTK."))

In [26]:
sentence.title

<bound method StringlikeMixin.title of Sentence("TextBlob is a Phython package that's built on top of the famous Python package called NLTK.")>

In [22]:
type(sentence.string), len(sentence.string), sentence.string

(str,
 91,
 "TextBlob is a Phython package that's built on top of the famous Python package called NLTK.")

In [27]:
# Custom Tokenize

In [36]:
from nltk.tokenize import WordPunctTokenizer
tokenizer = WordPunctTokenizer()

In [37]:
text_2 = "test test\ntest\ttest"

In [38]:
blob_2 = TextBlob(text_2, tokenizer = tokenizer)
type(blob_2), len(blob_2), blob_2

(textblob.blob.TextBlob,
 19,
 TextBlob("test test
 test	test"))

In [39]:
blob_2.tokens

WordList(['test', 'test', 'test', 'test'])

In [None]:
# Part-Of-Speech Tagging

In [40]:
blob_3 = TextBlob("I ran a race in Ottawa")
type(blob_3), len(blob_3), blob_3

(textblob.blob.TextBlob, 22, TextBlob("I ran a race in Ottawa"))

In [42]:
import nltk
nltk.download("averaged_perceptron_tagger")

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\한국IT비즈니스진흥협회\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [44]:
pos = blob_3.tags
type(pos), len(pos), pos

(list,
 6,
 [('I', 'PRP'),
  ('ran', 'VBD'),
  ('a', 'DT'),
  ('race', 'NN'),
  ('in', 'IN'),
  ('Ottawa', 'NNP')])

In [45]:
first = pos[0]
type(first), len(first), first

(tuple, 2, ('I', 'PRP'))

In [46]:
type(first[0]), len(first[0]), first[0]

(textblob.blob.Word, 1, 'I')

In [None]:
# 도전 문제 :
# 사용자 입력 (Americans live in the USA) 을 받고, proper noun의 리스트를 출력 (['Americans', 'USA']) 하는 프로그램
# POS를 이용!
# singular proper noun tag : NNP
# plural proper noun tag : NNPS

# 입력 : Americans live in the USA
# 출력 : ['Americans', 'USA']

In [48]:
tags = ["NNP", "NNPS"]

"MP" in tags

False

In [52]:
user_input = input()

user_blob = TextBlob(user_input)

result = []
tags = ["NNP", "NNPS"]

for pos_tuple in user_blob.tags:
    if pos_tuple[1] in tags:
        result.append(pos_tuple[0].string)

print(result)


['Americans', 'USA']
