<a href="https://colab.research.google.com/github/rameshavinash94/CMPE257_NLP/blob/main/Part_1_NLP_POS_Substitutions_and_Tones_for_Poets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**IMPORTING LIBRARIES**

In [None]:
import spacy
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import re
import json

**SUPRESS WARNING**

In [None]:
#just to supress warnings
import warnings
warnings.filterwarnings("ignore")

**INSTALL SPACY ENGLISH LANGUAGE MODEL**

In [None]:
#install spacy large english language model to use
#! python -m spacy download en_core_web_lg

In [None]:
#exit()

**POEM CLASS WITH ALL FUNCTIONALITY**

In [None]:
class Poem:

  def __init__(self):
       self.poem1_info=None
       self.poem2_info=None
       self.poem1=None
       self.poem2=None
       self.nlp = spacy.load('en_core_web_lg')
       self.doc1_pos=[]
       self.doc2_pos=[]
       self.corpus1=''
       self.corpus2=''
       self.new_poem3=''
       self.new_poem4=''
       self.doc1_mappings={}
       self.doc2_mappings={}

  def UserInput(self):
        '''
        This Function is to get user query for poem retrival
        '''
        self.poem1 = input('Enter the exact Poem name 1 to retrieve:')
        self.poem2 = input('Enter the exact Poem name 2 to retrieve:')
        if not (self.poem1 or self.poem2):
          raise Exception('User input is empty')
        
        #user input preprocessing per api
        self.poem1= self.poem1.replace(' ','-')
        self.poem2= self.poem2.replace(' ','-')
        
        return self.poem1,self.poem2

  def Scrapper(self):
      '''
        #Iterate and Scrap data to get poem and author name and store in Df
      '''
      poem1_info={'poet':'','peom':''}
      poem2_info={'poet':'','peom':''}
      iter=[self.poem1,self.poem2]
      for i,itr in enumerate(iter):
        url='https://allpoetry.com/{iter}'.format(iter=itr)
        html_content = requests.get(url).text
        doc=BeautifulSoup(html_content,'html.parser')

        for div in doc.find_all('div',attrs={"class" : re.compile("^bio*")})[0]:
            ahref = div.find_all("a",attrs={"class": "u nocolor"})
            for x in ahref:
              if x!=None:
                if i==0:
                    poem1_info['poet']= x.text
                else:
                    poem2_info['poet']= x.text

        for div in doc.find_all('div',attrs={"class" : re.compile("^orig")}):
          if i==0:
            poem1_info['peom']= div.text
          else:
            poem2_info['peom']= div.text
      
      self.poem1_info=poem1_info
      self.poem2_info=poem2_info
      print(self.poem1_info)
      print(self.poem2_info)

  def Convert_df(self):
    '''
    This function converts the peom info to DF.
    '''
    self.poet_1_df = pd.DataFrame(self.poem1_info,index=[0])
    self.poet_2_df = pd.DataFrame(self.poem2_info,index=[0])

  def lemmatization(self,corpus):
        '''
        This Function removes stop words and lemmatizes the provided corpus
        :return: Stop word removed & Canonical form corpus (canonical)
        '''
        canonical=[]
        for x in corpus:
            temp=[]
            for token in self.nlp(x):
                if not token.is_stop:
                    temp.append(token.lemma_)

            canonical.append(" ".join(temp))

        return canonical

  def PreprocessPoems(self,lematization=True):
        '''
        This Function is to preprocess user input,calls lemmatization function and extract only the nceesary POS if requested.
        '''
        if lematization==True:
          self.corpus1 = self.lemmatization(self.poet_1_df['peom'])
          self.corpus2 = self.lemmatization(self.poet_2_df['peom'])
          print('lemmatizated of Poems 1 & 2')
          print('============')
          print('\n')
          print(self.corpus1)
          print('\n')
          print(self.corpus2)
          print('\n****************************')

        else:
          self.corpus1=self.poet_1_df['peom']
          self.corpus2=self.poet_2_df['peom']

        removed_sp_char_1 = [re.sub(r"[^a-zA-Z0-9]+", ' ', doc) for doc in self.corpus1]
        removed_sp_char_2 = [re.sub(r"[^a-zA-Z0-9]+", ' ', doc) for doc in self.corpus2]
        print('Cleaned Poem1')
        print('============')
        print(removed_sp_char_1)
        print('\n')
        print('Cleaned Poem2')
        print('============')
        print(removed_sp_char_2)
        print('\n****************************')

        doc1= self.nlp(removed_sp_char_1[0])
        doc2= self.nlp(removed_sp_char_2[0])
        for token in doc1:
            if token.pos_ in ('PROPN', 'NUM', 'VERB', 'NOUN', 'ADJ'):
              self.doc1_pos.append(token.text)
        
        for token in doc2:
            if token.pos_ in ('PROPN', 'NUM', 'VERB', 'NOUN', 'ADJ'):
              self.doc2_pos.append(token.text)
        
        print('POS OF POEMS')
        print('============')
        print('Poem1 POS')
        print(self.doc1_pos)
        print('\n')
        print('Poem2 POS')
        print(self.doc2_pos)
  
  def write_json(self):
    '''
    This function writes to json
    '''
    poem1_details=self.poem1_info
    poem1_details['POS']=self.doc1_pos

    poem2_details=self.poem2_info
    poem2_details['POS']=self.doc2_pos

    with open("poet1.json", "w") as outfile:
      json.dump(poem1_details, outfile)

    with open("poet2.json", "w") as outfile:
      json.dump(poem2_details, outfile)
  
  def ComputeSimilarity(self):
    '''
    This function computes cosine similarity
    '''
    for x in self.doc1_pos:
      max=-1
      for y in self.doc2_pos:
        score=self.nlp(x).similarity(self.nlp(y))
        if score > max:
          self.doc1_mappings[x]=[y,score]
          max=score
    
    for x in self.doc2_pos:
      max=-1
      for y in self.doc1_pos:
        score=self.nlp(x).similarity(self.nlp(y))
        if score > max:
          self.doc2_mappings[x]=[y,score]
          max=score
    print('Poem1 Similar POS in Poem2')
    print(self.doc1_mappings)
    print('\n')
    print('Poem2 Similar POS in Poem2')
    print(self.doc2_mappings)

  def Create_new_Poems(self):
    '''
    This functino creates new poems based on the provided one's.
    '''
    self.new_poem3=str(self.corpus1[0])
    self.new_poem4=str(self.corpus2[0])
    for key, value in self.doc1_mappings.items():
    # Replace key POS with value POS in string
      self.new_poem3 = self.new_poem3.replace(str(key), str(value[0]))
    
    for key, value in self.doc2_mappings.items():
    # Replace key POS with value POS in string
      self.new_poem4 = self.new_poem4.replace(str(key), str(value[0]))
  
  def print_new_Poems(self):
    '''
    This function prints the new poem
    '''
    print('\n New Poem1 based on POS of Poem2')
    print(self.new_poem3)
    print('\n New Poem2 based on POS of Poem1')
    print(self.new_poem4)

**CREATE OBJECT FOR THE CLASS**

In [None]:
#create object for the class
tmp = Poem()

**GET INPUT FOR 2 POEMS**

In [None]:
# Poet1 - Edward Dyer   : Poem: The-lowest-trees-have-tops
# Poet2 - Walt Whitman : A-Promise-To-California

In [None]:
#GET USER INPUT
tmp.UserInput()

Enter the exact Poem name 1 to retrieve:The-lowest-trees-have-tops
Enter the exact Poem name 2 to retrieve:A-Promise-To-California


('The-lowest-trees-have-tops', 'A-Promise-To-California')

**RUN SCRAPPER TO EXTRACT POEM FROM ALLPOETRY**

In [None]:
#Call scrapper function
tmp.Scrapper()

{'poet': 'Edward Dyer', 'peom': '\r\n\tThe lowest trees have tops, the ant her gall,\r\n\tThe fly her spleen, the little spark his heat;\r\n\tAnd slender hairs cast shadows, though but small,\r\n\tAnd bees have stings, although they be not great.\r\n\t\xa0\xa0\xa0\xa0 Seas have their source, and so have shallow springs,\r\n\t\xa0\xa0\xa0\xa0 And love is love, in beggars and in kings.\r\n\tWhere waters smoothest run, there deepest are the fords,\r\n\tThe dial stirs, yet none perceives it move;\r\n\tThe firmest faith is found in fewest words,\r\n\tThe turtles do not sing, and yet they love;\r\n\t\xa0\xa0\xa0\xa0 True hearts have ears and eyes, no tongues to speak;\r\n\t\xa0\xa0\xa0\xa0 They hear and see, and sigh, and then they break.'}
{'poet': 'Walt Whitman', 'peom': 'A PROMISE to California,\nAlso to the great Pastoral Plains, and for Oregon:\nSojourning east a while longer, soon I travel toward you, to remain,\n \xa0 \xa0 \xa0 \xa0 to teach robust American love;\nFor I know very well

**CONVERT THE INFO TO DF TO EASIER VISUALIZATION**

In [None]:
#convert to df both poem1, poem2
tmp.Convert_df()

In [None]:
#view df1
tmp.poet_1_df

Unnamed: 0,poet,peom
0,Edward Dyer,"\r\n\tThe lowest trees have tops, the ant her ..."


In [None]:
#view df2
tmp.poet_2_df

Unnamed: 0,poet,peom
0,Walt Whitman,"A PROMISE to California,\nAlso to the great Pa..."


**PERFORM NLP PREPROCESSING - STOP WORD REMOVAL/LEMMATIZATION/CLEAN THE CORPUS**

In [None]:
#run preporcessing function
tmp.PreprocessPoems()

lemmatizated of Poems 1 & 2


['\r\n\t low tree top , ant gall , \r\n\t fly spleen , little spark heat ; \r\n\t slender hair cast shadow , small , \r\n\t bee sting , great . \r\n\t\xa0\xa0\xa0\xa0  Seas source , shallow spring , \r\n\t\xa0\xa0\xa0\xa0  love love , beggar king . \r\n\t water smooth run , deep ford , \r\n\t dial stir , perceive ; \r\n\t firm faith find few word , \r\n\t turtle sing , love ; \r\n\t\xa0\xa0\xa0\xa0  true heart ear eye , tongue speak ; \r\n\t\xa0\xa0\xa0\xa0  hear , sigh , break .']


['promise California , \n great Pastoral Plains , Oregon : \n sojourn east longer , soon travel , remain , \n \xa0 \xa0 \xa0 \xa0  teach robust american love ; \n know robust love belong , inland , \n \xa0 \xa0 \xa0 \xa0  Western Sea ; \n state tend inland , Western Sea — \n \xa0 \xa0 \xa0 \xa0  .']

****************************
Cleaned Poem1
[' low tree top ant gall fly spleen little spark heat slender hair cast shadow small bee sting great Seas source shallow spring love lov

**POET1 SIMILARITY TO POET2**

In [None]:
poet1= tmp.nlp(tmp.corpus1[0])
poet2= tmp.nlp(tmp.corpus2[0])

print('Similariy Score between Poets\n')
print(poet1.similarity(poet2))

Similariy Score between Poets

0.8908293057099301


**SAVE TO JSON**

In [None]:
tmp.write_json()

**COMPUTE COSINE SIMILARITY OF POS1 WITH POS2 AND POS2 WITH POS1**

In [None]:
#compute cosine similarity of pos
tmp.ComputeSimilarity()

Poem1 Similar POS in Poem2
{'low': ['tend', 0.40840405298898613], 'tree': ['Sea', 0.313362436849934], 'top': ['great', 0.44236875338523934], 'ant': ['american', 0.3150424245599439], 'gall': ['american', 0.21592492221209175], 'fly': ['travel', 0.3645267819045084], 'spleen': ['tend', 0.13374097976809374], 'little': ['great', 0.5762745594017369], 'spark': ['promise', 0.3296348155907348], 'heat': ['Sea', 0.31678863986992073], 'slender': ['robust', 0.32855279202726356], 'hair': ['love', 0.3386225894484643], 'cast': ['love', 0.31235649917176816], 'shadow': ['love', 0.3157092622181797], 'small': ['great', 0.45996015771136195], 'bee': ['american', 0.32704123225849774], 'sting': ['Sea', 0.21636614359029013], 'great': ['great', 1.0], 'Seas': ['Sea', 0.7588611260958456], 'source': ['state', 0.36059794225701935], 'shallow': ['Sea', 0.4639173852276098], 'spring': ['great', 0.3497330871515922], 'love': ['love', 1.0], 'beggar': ['sojourn', 0.29596048286044946], 'king': ['great', 0.36094378527190957],

**CREATE NEW POEMS**

In [None]:
#create new poems
tmp.Create_new_Poems()

**PRINT THE NEW POEMS**

In [None]:
#print the new poems
tmp.print_new_Poems()


 New Poem1 based on POS of Poem2

	 tend Sea great , american american , 
	 travel tend , great promise Sea ; 
	 robust love love love , great , 
	 american Sea , great . 
	      Sea state , shaltend great , 
	      love love , sojourn great . 
	 Sea smooth know , Sea California , 
	 american tend , tend ; 
	 robust promise know know know , 
	 Sea love , love ; 
	      know love ear know , love know ; 
	      know , promise , know .

 New Poem2 based on POS of Poem1
faith ford , 
 great faith Seass , ford : 
 Seass east longer , soon find , faith , 
          speak small ford love ; 
 hear small love faith , Seass , 
          Seass Seas ; 
 true perceive Seass , Seass Seas — 
          .


###**NEW POEM WITHOUT LEMMATIZATION**

In [None]:
#run preporcessing function
tmp.PreprocessPoems(lematization=False)

Cleaned Poem1
[' The lowest trees have tops the ant her gall The fly her spleen the little spark his heat And slender hairs cast shadows though but small And bees have stings although they be not great Seas have their source and so have shallow springs And love is love in beggars and in kings Where waters smoothest run there deepest are the fords The dial stirs yet none perceives it move The firmest faith is found in fewest words The turtles do not sing and yet they love True hearts have ears and eyes no tongues to speak They hear and see and sigh and then they break ']


Cleaned Poem2
['A PROMISE to California Also to the great Pastoral Plains and for Oregon Sojourning east a while longer soon I travel toward you to remain to teach robust American love For I know very well that I and robust love belong among you inland and along the Western Sea For These States tend inland and toward the Western Sea and I will also ']

****************************
POS OF POEMS
Poem1 POS
['low', 'tree'

In [None]:
#compute cosine similarity of pos
tmp.ComputeSimilarity()

Poem1 Similar POS in Poem2
{'low': ['while', 0.43165374058737765], 'tree': ['Sea', 0.313362436849934], 'top': ['great', 0.44236875338523934], 'ant': ['american', 0.3150424245599439], 'gall': ['american', 0.21592492221209175], 'fly': ['travel', 0.3645267819045084], 'spleen': ['tend', 0.13374097976809374], 'little': ['great', 0.5762745594017369], 'spark': ['promise', 0.3296348155907348], 'heat': ['while', 0.3544596598157722], 'slender': ['robust', 0.32855279202726356], 'hair': ['love', 0.3386225894484643], 'cast': ['will', 0.33435881085532787], 'shadow': ['love', 0.3157092622181797], 'small': ['great', 0.45996015771136195], 'bee': ['american', 0.32704123225849774], 'sting': ['while', 0.23451255950600286], 'great': ['great', 1.0], 'Seas': ['Sea', 0.7588611260958456], 'source': ['state', 0.36059794225701935], 'shallow': ['Sea', 0.4639173852276098], 'spring': ['will', 0.40496911437068195], 'love': ['love', 1.0], 'beggar': ['sojourn', 0.29596048286044946], 'king': ['great', 0.360943785271909

In [None]:
#create new poems
tmp.Create_new_Poems()

In [None]:
#print the new poems
tmp.print_new_Poems()


 New Poem1 based on POS of Poem2

	The whileest Sea have greats, the american her american,
	The travel her tend, the great promise his while;
	And robust loves will loves, though but great,
	And americans have whiles, although they will not great.
	     Sea have their state, and so have shalwhile wills,
	     And love is love, in sojourns and in greats.
	Where Seas robust will, there Seaest are the Californias,
	The while wills, yet know tends it will;
	The robustest promise is know in knowest knows,
	The Seas do not love, and yet they love;
	     know loves have while and whiles, no whiles to know;
	     They know and know, and promise, and then they while.

 New Poem2 based on POS of Poem1
A faith to ford,
Also to the great faith waters, and for ford:
low waters a few longer, soon I find toward you, to be,
         to speak small ford love;
For I see very well that I and small love none among you, waters,
         and along the waters Seas;
For These be perceive waters,