In [9]:
!pip install module pygal


Collecting pygal
  Downloading pygal-2.4.0-py2.py3-none-any.whl (127kB)
[K    100% |████████████████████████████████| 133kB 258kB/s ta 0:00:01
[?25hInstalling collected packages: pygal
Successfully installed pygal-2.4.0


In [14]:
import nltk
import ast
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
from nltk.corpus import sentiwordnet

_ReviewDataset = 'reviewsHahanoyume.txt'
_PreProcessedData= 'PreProcessedData.txt'
_TokenizedReviews= 'TokenizedReviews.txt'
_PosTaggedReviews= 'PosTaggedReviews.txt'
_Opinions= 'OpinionsOfUser.txt'


def preProcessing(inputFileStr,outputFileStr,printResult):
    inputFile = open(inputFileStr,"r").read()
    outputFile=open (outputFileStr,"w+")
    cachedStopWords = nltk.corpus.stopwords.words("english")
    cachedStopWords.append('OMG')
    cachedStopWords.append('HI')
    cachedStopWords.append('HELLO')
    cachedStopWords.append('THANK')
    cachedStopWords.append('KIDDO')
    cachedStopWords.append('HAHA')
    cachedStopWords.append('CA')
    cachedStopWords.append(':-(')
    cachedStopWords.append(':-)')
    result=(' '.join([word for word in inputFile.split() if word not in cachedStopWords]))
    if(printResult):
        print('Following are the Stop Words')
        print(cachedStopWords)
        print(str(result))
    outputFile.write(str(result))
    outputFile.close()
    
def tokenizeReviews(inputFileStr,outputFileStr,printResult):
    tokenizedReviews={}
    inputFile = open(inputFileStr,"r").read()
    outputFile=open (outputFileStr,"w")
    tokenizer = nltk.tokenize.punkt.PunktSentenceTokenizer()
    uniqueId=1;
    cachedStopWords = nltk.corpus.stopwords.words("english")
    for sentence in tokenizer.tokenize(inputFile):      
        tokenizedReviews[uniqueId]=sentence
        uniqueId+=1
    outputFile.write(str(tokenizedReviews))
    if(printResult):
        for key,value in tokenizedReviews.items():
            print(key,' ',value)
    outputFile.close()
    
def posTagging(inputFileStr,outputFileStr,printResult):
    inputFile = open(inputFileStr,"r").read()
    outputFile=open (outputFileStr,"w")
    inputTupples=ast.literal_eval(inputFile)
    outputPost={}
    for key,value in inputTupples.items():
        outputPost[key]=nltk.pos_tag(nltk.word_tokenize(value))
    if(printResult):
        for key,value in outputPost.items():
            print(key,' ',value)
    outputFile.write(str(outputPost))
    outputFile.close()
    
def opinionExtraction(inputFileStr,outputFileStr,printResult):
    inputFile = open(inputFileStr,"r").read()
    outputFile=open (outputFileStr,"w")
    inputTupples=ast.literal_eval(inputFile)
    prevWord=''
    prevTag=''
    currWord=''
    aspectList=[]
    outputDict={}
    #Extracting Aspects
    for key,value in inputTupples.items():
        for word,tag in value:
            if(tag=='JJ' or tag=='JJR' or tag=='JJS'or tag== 'RB' or tag== 'RBR'or tag== 'RBS'):
                if(prevTag=='NN' or prevTag=='NNP'):
                    currWord= prevWord + ' ' + word
                else:
                    aspectList.append(prevWord.upper())
                    currWord= word
            prevWord=currWord
            prevTag=tag
    #Eliminating aspect which has 1 or less count
    for aspect in aspectList:
            if(aspectList.count(aspect)>1):
                    if(outputDict.keys()!=aspect):
                            outputDict[aspect]=aspectList.count(aspect)
    outputAspect=sorted(outputDict.items(), key=lambda x: x[1],reverse = True)
    if(printResult):
        print(outputAspect)
    outputFile.write(str(outputAspect))
    outputFile.close()

def printResultChoice():
    userChoice = str(input('\nDo you want to print the result on output window? (Y/N) :'))
    if(userChoice=='Y' or userChoice=='y'):
        return True
    else:
        return False
        
if __name__ == '__main__':
    print("\n\n\n\n\n\nPREPROCESSING DATA")
    preProcessing(_ReviewDataset,_PreProcessedData,printResultChoice())
    print("\n\n\n\n\n\nTOKENIZING REVIEW")
    tokenizeReviews(_ReviewDataset,_TokenizedReviews,printResultChoice())
    print("\n\n\n\n\n\nPOS TAGGING")
    posTagging(_TokenizedReviews,_PosTaggedReviews,printResultChoice())
    print("\nOPINIONS OF USERS")
    opinionExtraction(_PosTaggedReviews,_Opinions,printResultChoice())








PREPROCESSING DATA

Do you want to print the result on output window? (Y/N) :y
Following are the Stop Words
['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 


Do you want to print the result on output window? (Y/N) :y
1   [('``', '``'), ('Love', 'VB'), ('how', 'WRB'), ('comfortable', 'JJ'), ('and', 'CC'), ('cute', 'JJ'), ('these', 'DT'), ('kimonos', 'NNS'), ('are', 'VBP'), ('!', '.'), ('!', '.'), ('!', '.')]
2   [('They', 'PRP'), ('’', 'VBP'), ('re', 'JJ'), ('super', 'JJ'), ('adorable', 'NN'), ('on', 'IN'), ('my', 'PRP$'), ('baby', 'NN'), ('!', '.'), ('!', '.'), ("''", "''")]
3   [('very', 'RB'), ('good', 'JJ'), ('material', 'NN'), ('and', 'CC'), ('very', 'RB'), ('nice', 'JJ'), ('.', '.')]
4   [('im', 'NNS'), ('so', 'RB'), ('excited', 'JJ'), ('to', 'TO'), ('receive', 'VB'), ('this', 'DT'), ('beautiful', 'JJ'), ('kimono', 'FW'), ('eventhough', 'RB'), ('im', 'NN'), ('not', 'RB'), ('the', 'DT'), ('one', 'NN'), ('who', 'WP'), ('wears', 'VBZ'), ('it', 'PRP'), ('.', '.')]
5   [('hahaha', 'NN'), ('.', '.')]
6   [('Superb', 'NNP'), ('material', 'NN'), ('and', 'CC'), ('workmanship', 'NN'), (',', ','), ('with', 'IN'), ('attention', 'NN'), ('to', 'TO'


Do you want to print the result on output window? (Y/N) :y
[('SO', 16), ('VERY', 14), ('GOOD', 12), ('NICE', 6), ('DEFINITELY', 6), ('MUCH', 6), ('GREAT', 5), ('BEAUTIFUL', 4), ('LOVELY', 4), ('REALLY', 4), ('WELL', 4), ('COMFORTABLE', 3), ('CUTE', 3), ('EXCELLENT', 3), ('MORE', 3), ('SOFT', 3), ('SUPER', 2), ('EXCITED', 2), ('HANDWRITTEN', 2), ('RESPONSIVE', 2), ('TOO', 2), ('NICELY', 2), ('FAST', 2), ('LITTLE', 2), ('GORGEOUS', 2), ('JUST', 2), ('BACK', 2), ('PROMPT', 2), ('HIGHLY', 2), ('AGAIN', 2), ('WONDERFUL', 2)]
