## 3.2 Development of a (Noun - Adjective) Pair Ranker 

#### Import necessary libraries

In [1]:
import pandas as pd
import spacy
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
import en_core_web_sm
from spacy.lang.en import English
nlp = en_core_web_sm.load()
from collections import Counter 
from itertools import chain

#### Co-referencing was done on the original text file and saved as 'resolved.csv' as shown in 'Assignment_1_3.2_coreferencing.py'

#### Read the csv file (one saved after co-referencing) and create the appropriate DataFrame

In [2]:
df = pd.read_csv('resolved_data.csv')
df = df.iloc[:, [1]]
df = pd.DataFrame(df)
df.rename(columns = {0:'text'}, inplace = True) 
text_doc = ' '.join(df.resolved)

#### (1) Preparing the DataFrame to be used by NLP libraries. 
#### (2) Identifying the <noun, adjective> pairs using the POS tags

In [14]:
#preparing the text using nlp() function

text_doc_nlp = nlp(text_doc)

def quote(string):
    return "{}" .format(string)

noun_adj_pairs2 = []
for i,token in enumerate(text_doc_nlp):
    if token.pos_ not in ('NOUN'):
        continue
    for j in range(i+1,len(text_doc_nlp)):
        if text_doc_nlp[j].pos_ == 'ADJ':
            word1 = quote(token)
            word2 = quote(text_doc_nlp[j])
            noun_adj_pairs2.append((word1,word2))
            break

#### Using the Counter library to find the most frequent pairs and their counts

In [27]:
#Using counter to extract the most common pairs

c = Counter(noun_adj_pairs2)
print(c.most_common(5))

[(('food', 'good'), 6), (('food', 'great'), 6), (('view', 'great'), 4), (('place', 'good'), 4), (('year', 'old'), 4)]


In [28]:
#counting the frequency of the most common pairs

def CountFrequency(my_list): 
    freq = {} 
    
    for item in my_list: 
        if (item in freq):
            freq[item] += 1
            
        else: 
            freq[item] = 1
    return freq

In [29]:
c = Counter(noun_adj_pairs2) 

print(c.most_common(30))

#### Preparing a dictionary with the nouns as keys and the adjectives in a list as the value. Each of the adjectives chosen to 
#### be in the list such that all these respective noun-adjective pairs appear with equal frequency

In [31]:
fdict = {}
freq_dcit = {}
for a, b in c.most_common(30):
    word1 = a[0]
    word2 = a[1]
    if word1 not in fdict:
        freq_dcit[word1] = b
        fdict[word1] = []
        fdict[word1].append(word2)
    else:
        if b == freq_dcit[word1]:
            fdict[word1].append(word2)

In [32]:
#Showing the output of the nouns and the respective adjectives used to describe the nouns with the same frequency value

freq_dcit, fdict

({'food': 6,
  'view': 4,
  'place': 4,
  'year': 4,
  'drinks': 3,
  'kids': 3,
  'sandwich': 3,
  'friend': 2,
  'sister': 2,
  'mother': 2,
  'omelet': 2,
  'family': 2,
  'service': 2,
  'times': 2,
  'meal': 2,
  'hour': 2,
  'menu': 2},
 {'food': ['good', 'great'],
  'view': ['great'],
  'place': ['good'],
  'year': ['old'],
  'drinks': ['great'],
  'kids': ['fresh', 'good'],
  'sandwich': ['good'],
  'friend': ['first'],
  'sister': ['confusing'],
  'mother': ['confusing'],
  'omelet': ['disappointed'],
  'family': ['regular', 'nice'],
  'service': ['friendly'],
  'times': ['latest'],
  'meal': ['simple'],
  'hour': ['good'],
  'menu': ['good']})

#### Now to rank the pairs (unique nouns that appear with each adjective appear with equal frequency), we use TextBlob's 
#### scorer. Example) Ranking 'good food', .great food' and 'fresh food' to choose which is the msot appropriate <noun, adj> 
#### pair to be chosen

In [33]:
outdict = []
for i in fdict.keys():
    indict = []
    for j in fdict[i]:
        string1 = j + " " + i
        indict.append(string1)
    outdict.append(indict)

In [34]:
# Using TextBlob to get the sentiment and the polarity of the phrase 

polArr = []
for i in range(len(outdict)):
    curPol = []
    for j in range(len(outdict[i])):
        curPol.append(TextBlob(outdict[i][j]).sentiment.polarity)
    polArr.append(curPol)

In [35]:
#output of the polarities of each (noun, adjective) phrase for each unique noun

polArr

[[0.7, 0.8],
 [0.8],
 [0.7],
 [0.1],
 [0.8],
 [0.3, 0.7],
 [0.7],
 [0.25],
 [-0.3],
 [-0.3],
 [-0.75],
 [0.0, 0.6],
 [0.375],
 [0.5],
 [0.0],
 [0.7],
 [0.7]]