# Basics

In [None]:
# Import necessary libraries
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.probability import FreqDist
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

# Download necessary NLTK data files
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Sample text for demonstration
sample_text = """
Natural language processing (NLP) is a field of artificial intelligence in which computers analyze, understand, and derive meaning from human language in a smart and useful way. By utilizing NLP, developers can organize and structure knowledge to perform tasks such as automatic summarization, translation, named entity recognition, relationship extraction, sentiment analysis, speech recognition, and topic segmentation.
"""

In [2]:
word_tokens = word_tokenize(sample_text)
word_tokens

['Natural',
 'language',
 'processing',
 '(',
 'NLP',
 ')',
 'is',
 'a',
 'field',
 'of',
 'artificial',
 'intelligence',
 'in',
 'which',
 'computers',
 'analyze',
 ',',
 'understand',
 ',',
 'and',
 'derive',
 'meaning',
 'from',
 'human',
 'language',
 'in',
 'a',
 'smart',
 'and',
 'useful',
 'way',
 '.',
 'By',
 'utilizing',
 'NLP',
 ',',
 'developers',
 'can',
 'organize',
 'and',
 'structure',
 'knowledge',
 'to',
 'perform',
 'tasks',
 'such',
 'as',
 'automatic',
 'summarization',
 ',',
 'translation',
 ',',
 'named',
 'entity',
 'recognition',
 ',',
 'relationship',
 'extraction',
 ',',
 'sentiment',
 'analysis',
 ',',
 'speech',
 'recognition',
 ',',
 'and',
 'topic',
 'segmentation',
 '.']

In [3]:
sent_tokens = sent_tokenize(sample_text)
sent_tokens

['\nNatural language processing (NLP) is a field of artificial intelligence in which computers analyze, understand, and derive meaning from human language in a smart and useful way.',
 'By utilizing NLP, developers can organize and structure knowledge to perform tasks such as automatic summarization, translation, named entity recognition, relationship extraction, sentiment analysis, speech recognition, and topic segmentation.']

In [4]:
stop_words = set(stopwords.words("english"))
useful_words = [word for word in word_tokens if word.lower() not in stop_words]
useful_words

['Natural',
 'language',
 'processing',
 '(',
 'NLP',
 ')',
 'field',
 'artificial',
 'intelligence',
 'computers',
 'analyze',
 ',',
 'understand',
 ',',
 'derive',
 'meaning',
 'human',
 'language',
 'smart',
 'useful',
 'way',
 '.',
 'utilizing',
 'NLP',
 ',',
 'developers',
 'organize',
 'structure',
 'knowledge',
 'perform',
 'tasks',
 'automatic',
 'summarization',
 ',',
 'translation',
 ',',
 'named',
 'entity',
 'recognition',
 ',',
 'relationship',
 'extraction',
 ',',
 'sentiment',
 'analysis',
 ',',
 'speech',
 'recognition',
 ',',
 'topic',
 'segmentation',
 '.']

In [5]:
punctuation_marks = {".", ",", "(", ")", "``", "''", "'", "!", "?", ":", ";", "-", "--"}
extended_stop_words = stop_words.union(punctuation_marks)
useful_words = [word for word in word_tokens if word.lower() not in extended_stop_words]
useful_words

['Natural',
 'language',
 'processing',
 'NLP',
 'field',
 'artificial',
 'intelligence',
 'computers',
 'analyze',
 'understand',
 'derive',
 'meaning',
 'human',
 'language',
 'smart',
 'useful',
 'way',
 'utilizing',
 'NLP',
 'developers',
 'organize',
 'structure',
 'knowledge',
 'perform',
 'tasks',
 'automatic',
 'summarization',
 'translation',
 'named',
 'entity',
 'recognition',
 'relationship',
 'extraction',
 'sentiment',
 'analysis',
 'speech',
 'recognition',
 'topic',
 'segmentation']

In [6]:
len(word_tokens), len(useful_words)

(69, 39)

In [7]:
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in useful_words]
print(" ".join(stemmed_words))

natur languag process nlp field artifici intellig comput analyz understand deriv mean human languag smart use way util nlp develop organ structur knowledg perform task automat summar translat name entiti recognit relationship extract sentiment analysi speech recognit topic segment


In [8]:
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in useful_words]
print(" ".join(lemmatized_words))

Natural language processing NLP field artificial intelligence computer analyze understand derive meaning human language smart useful way utilizing NLP developer organize structure knowledge perform task automatic summarization translation named entity recognition relationship extraction sentiment analysis speech recognition topic segmentation


# Part of Speech (PoS) Tagging and Named-Entity Recognition (NER)

In [1]:
import spacy

sample_text = """
Natural language processing (NLP) is a field of artificial intelligence in which computers analyze, understand, and derive meaning from human language in a smart and useful way. By utilizing NLP, developers can organize and structure knowledge to perform tasks such as automatic summarization, translation, named entity recognition, relationship extraction, sentiment analysis, speech recognition, and topic segmentation.
"""

In [4]:
spacy_nlp = spacy.load("en_core_web_sm")

In [6]:
doc = spacy_nlp(sample_text)

# Part of Speech (PoS) Tagging
for i, token in enumerate(doc):
    print(f"{i+1}.) {token.text}: {token.pos_}")

print("\n")

# Named Entity Recognition (NER)
for i, ent in enumerate(doc.ents):
    print(f"{i + 1}.) {ent.text}: {ent.label_}")

1.) 
: SPACE
2.) Natural: ADJ
3.) language: NOUN
4.) processing: NOUN
5.) (: PUNCT
6.) NLP: PROPN
7.) ): PUNCT
8.) is: AUX
9.) a: DET
10.) field: NOUN
11.) of: ADP
12.) artificial: ADJ
13.) intelligence: NOUN
14.) in: ADP
15.) which: PRON
16.) computers: NOUN
17.) analyze: VERB
18.) ,: PUNCT
19.) understand: VERB
20.) ,: PUNCT
21.) and: CCONJ
22.) derive: VERB
23.) meaning: NOUN
24.) from: ADP
25.) human: ADJ
26.) language: NOUN
27.) in: ADP
28.) a: DET
29.) smart: ADJ
30.) and: CCONJ
31.) useful: ADJ
32.) way: NOUN
33.) .: PUNCT
34.) By: ADP
35.) utilizing: VERB
36.) NLP: PROPN
37.) ,: PUNCT
38.) developers: NOUN
39.) can: AUX
40.) organize: VERB
41.) and: CCONJ
42.) structure: VERB
43.) knowledge: NOUN
44.) to: PART
45.) perform: VERB
46.) tasks: NOUN
47.) such: ADJ
48.) as: ADP
49.) automatic: ADJ
50.) summarization: NOUN
51.) ,: PUNCT
52.) translation: NOUN
53.) ,: PUNCT
54.) named: VERB
55.) entity: NOUN
56.) recognition: NOUN
57.) ,: PUNCT
58.) relationship: NOUN
59.) extraction: