# NLP Uygulamaları

## N-Gram

In [1]:
from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
import pandas as pd

In [3]:
from textblob import TextBlob

In [4]:
a = """Bu örneği anlaşılabilmesi için daha uzun bir metin üzerinden göstereceğim.
N-gram'lar birlikte kullanılan kelimelerin kombinasyolarını gösterir"""

In [5]:
TextBlob(a).ngrams(3)

[WordList(['Bu', 'örneği', 'anlaşılabilmesi']),
 WordList(['örneği', 'anlaşılabilmesi', 'için']),
 WordList(['anlaşılabilmesi', 'için', 'daha']),
 WordList(['için', 'daha', 'uzun']),
 WordList(['daha', 'uzun', 'bir']),
 WordList(['uzun', 'bir', 'metin']),
 WordList(['bir', 'metin', 'üzerinden']),
 WordList(['metin', 'üzerinden', 'göstereceğim']),
 WordList(['üzerinden', 'göstereceğim', "N-gram'lar"]),
 WordList(['göstereceğim', "N-gram'lar", 'birlikte']),
 WordList(["N-gram'lar", 'birlikte', 'kullanılan']),
 WordList(['birlikte', 'kullanılan', 'kelimelerin']),
 WordList(['kullanılan', 'kelimelerin', 'kombinasyolarını']),
 WordList(['kelimelerin', 'kombinasyolarını', 'gösterir'])]

## Part of speech tagging (POS)

In [6]:
import nltk

In [7]:
nltk.download("averaged_perceptron_tagger")

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/sametsengun/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [8]:
metin = """
A Scandal in Bohemia! 01
The Red-headed League,2
A Case, of Identity 33
The Boscombe Valley Mystery4
The Five Orange Pips1
The Man with? the Twisted Lip
The Adventure of the Blue Carbuncle
The Adventure of the Speckled Band
The Adventure of the Engineer's Thumb
The Adventure of the Noble Bachelor
The Adventure of the Beryl Coronet
The Adventure of the Copper Beeches"""

metin
v_metin = metin.split("\n")
v = pd.Series(v_metin)
metin_vektoru = v[1:len(v)]

In [9]:
mdf = pd.DataFrame(metin_vektoru, columns = ["hikayeler"])
d_mdf = mdf.copy()
d_mdf = d_mdf["hikayeler"].apply(lambda x: " ".join(x.lower() for x in x.split()))
d_mdf = d_mdf.str.replace("[^\w\s]","")
d_mdf = d_mdf.str.replace("\d","")
d_mdf = pd.DataFrame(d_mdf)

In [10]:
TextBlob(d_mdf["hikayeler"][2]).tags

[('the', 'DT'), ('redheaded', 'JJ'), ('league', 'NN')]

In [11]:
d_mdf["hikayeler"].apply(lambda x: TextBlob(x).tags)

1     [(a, DT), (scandal, NN), (in, IN), (bohemia, NN)]
2            [(the, DT), (redheaded, JJ), (league, NN)]
3       [(a, DT), (case, NN), (of, IN), (identity, NN)]
4     [(the, DT), (boscombe, NN), (valley, NN), (mys...
5     [(the, DT), (five, CD), (orange, NN), (pips, N...
6     [(the, DT), (man, NN), (with, IN), (the, DT), ...
7     [(the, DT), (adventure, NN), (of, IN), (the, D...
8     [(the, DT), (adventure, NN), (of, IN), (the, D...
9     [(the, DT), (adventure, NN), (of, IN), (the, D...
10    [(the, DT), (adventure, NN), (of, IN), (the, D...
11    [(the, DT), (adventure, NN), (of, IN), (the, D...
12    [(the, DT), (adventure, NN), (of, IN), (the, D...
Name: hikayeler, dtype: object

## Chunking (shallow parsing)

In [12]:
pos = d_mdf["hikayeler"].apply(lambda x: TextBlob(x).tags)

In [13]:
pos

1     [(a, DT), (scandal, NN), (in, IN), (bohemia, NN)]
2            [(the, DT), (redheaded, JJ), (league, NN)]
3       [(a, DT), (case, NN), (of, IN), (identity, NN)]
4     [(the, DT), (boscombe, NN), (valley, NN), (mys...
5     [(the, DT), (five, CD), (orange, NN), (pips, N...
6     [(the, DT), (man, NN), (with, IN), (the, DT), ...
7     [(the, DT), (adventure, NN), (of, IN), (the, D...
8     [(the, DT), (adventure, NN), (of, IN), (the, D...
9     [(the, DT), (adventure, NN), (of, IN), (the, D...
10    [(the, DT), (adventure, NN), (of, IN), (the, D...
11    [(the, DT), (adventure, NN), (of, IN), (the, D...
12    [(the, DT), (adventure, NN), (of, IN), (the, D...
Name: hikayeler, dtype: object

In [14]:
cumle = "R and Python are useful data science tools for the new or old data scientists who eager to do efficent data science task"


In [15]:
pos = TextBlob(cumle).tags

In [16]:
pos

[('R', 'NNP'),
 ('and', 'CC'),
 ('Python', 'NNP'),
 ('are', 'VBP'),
 ('useful', 'JJ'),
 ('data', 'NNS'),
 ('science', 'NN'),
 ('tools', 'NNS'),
 ('for', 'IN'),
 ('the', 'DT'),
 ('new', 'JJ'),
 ('or', 'CC'),
 ('old', 'JJ'),
 ('data', 'NNS'),
 ('scientists', 'NNS'),
 ('who', 'WP'),
 ('eager', 'VBP'),
 ('to', 'TO'),
 ('do', 'VB'),
 ('efficent', 'JJ'),
 ('data', 'NNS'),
 ('science', 'NN'),
 ('task', 'NN')]

In [17]:
reg_exp = "NP: {<DT>?<JJ>*<NN>}"
rp = nltk.RegexpParser(reg_exp)

In [18]:
sonuclar = rp.parse(pos)

In [19]:
sonuclar

The Ghostscript executable isn't found.
See http://web.mit.edu/ghostscript/www/Install.htm
If you're using a Mac, you can try installing
https://docs.brew.sh/Installation then `brew install ghostscript`


LookupError: 

Tree('S', [('R', 'NNP'), ('and', 'CC'), ('Python', 'NNP'), ('are', 'VBP'), ('useful', 'JJ'), ('data', 'NNS'), Tree('NP', [('science', 'NN')]), ('tools', 'NNS'), ('for', 'IN'), ('the', 'DT'), ('new', 'JJ'), ('or', 'CC'), ('old', 'JJ'), ('data', 'NNS'), ('scientists', 'NNS'), ('who', 'WP'), ('eager', 'VBP'), ('to', 'TO'), ('do', 'VB'), ('efficent', 'JJ'), ('data', 'NNS'), Tree('NP', [('science', 'NN')]), Tree('NP', [('task', 'NN')])])

In [20]:
print(sonuclar)

(S
  R/NNP
  and/CC
  Python/NNP
  are/VBP
  useful/JJ
  data/NNS
  (NP science/NN)
  tools/NNS
  for/IN
  the/DT
  new/JJ
  or/CC
  old/JJ
  data/NNS
  scientists/NNS
  who/WP
  eager/VBP
  to/TO
  do/VB
  efficent/JJ
  data/NNS
  (NP science/NN)
  (NP task/NN))


In [21]:
#sonuclar.draw()

## Named Entity Recognition

In [22]:
from nltk import word_tokenize, pos_tag, ne_chunk
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /Users/sametsengun/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     /Users/sametsengun/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [23]:
cumle = "Hadley is creative people who work for R Studio AND he attented conference at Newyork last year"
print(ne_chunk(pos_tag(word_tokenize(cumle))))

(S
  (GPE Hadley/NNP)
  is/VBZ
  creative/JJ
  people/NNS
  who/WP
  work/VBP
  for/IN
  (ORGANIZATION R/NNP Studio/NNP)
  AND/CC
  he/PRP
  attented/VBD
  conference/NN
  at/IN
  (ORGANIZATION Newyork/NNP)
  last/JJ
  year/NN)
