# Project:
# Whatsapp Chat sentiment analyzer

# Author : Pranshu Sharma



# Installing various dependencies

In [2]:
!pip install emoji

Collecting emoji
  Downloading emoji-1.2.0-py3-none-any.whl (131 kB)
Installing collected packages: emoji
Successfully installed emoji-1.2.0


In [5]:
!pip install wordcloud

Collecting wordcloud
  Downloading wordcloud-1.8.1-cp38-cp38-win_amd64.whl (155 kB)
Installing collected packages: wordcloud
Successfully installed wordcloud-1.8.1


In [10]:
!pip install nltk



In [12]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [15]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to C:\Users\PRANSHU
[nltk_data]     SHARMA\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

# Importing Libraries

In [37]:
import re
import pandas as pd
import numpy as np
import emoji
from collections import Counter
import matplotlib.pyplot as plt
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

import warnings
warnings.filterwarnings('ignore')

# Designing Functions for extraction of information liketime etc

In [7]:
# Extract Time
def date_time(s):
    pattern = '^([0-9]+)(\/)([0-9]+)(\/)([0-9]+), ([0-9]+):([0-9]+)[ ]?(AM|PM|am|pm)? -'
    result = re.match(pattern, s)
    if result:
        return True
    return False

# Find Authors or Contacts
def find_author(s):
    s = s.split(":")
    if len(s)==2:
        return True
    else:
        return False

# Finding Messages
def getDatapoint(line):
    splitline = line.split(' - ')
    dateTime = splitline[0]
    date, time = dateTime.split(", ")
    message = " ".join(splitline[1:])
    if find_author(message):
        splitmessage = message.split(": ")
        author = splitmessage[0]
        message = " ".join(splitmessage[1:])
    else:
        author= None
    return date, time, author, message

# Loading the txt file of Chat

In [8]:
data = []
conversation = 'WhatsApp Chat with debashish.txt'

with open(conversation, encoding="utf-8") as fp:
    fp.readline()
    messageBuffer = []
    date, time, author = None, None, None
    while True:
        line = fp.readline()
        if not line:
            break
        line = line.strip()
        if date_time(line):
            if len(messageBuffer) > 0:
                data.append([date, time, author, ' '.join(messageBuffer)])
            messageBuffer.clear()
            date, time, author, message = getDatapoint(line)
            messageBuffer.append(message)
        else:
            messageBuffer.append(line)

# Analyzing Text, ( Natural Language Processing)

In [18]:
df = pd.DataFrame(data, columns=["Date", 'Time', 'Author', 'Message'])
df['Date'] = pd.to_datetime(df['Date'])

data = df.dropna()
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sentiments = SentimentIntensityAnalyzer()
data["Positive"] = [sentiments.polarity_scores(i)["pos"] for i in data["Message"]]
data["Negative"] = [sentiments.polarity_scores(i)["neg"] for i in data["Message"]]
data["Neutral"] = [sentiments.polarity_scores(i)["neu"] for i in data["Message"]]
print(data.head())

        Date      Time               Author            Message  Positive  \
0 2020-11-12  10:12 pm            debashish    <Media omitted>     0.000   
1 2020-11-12  10:15 pm  Pranshu ਗਬਰੂ शर्मा💫  thanks a lot bhai     0.592   
2 2020-11-12  10:15 pm            debashish              🚾 Bro     0.000   
3 2020-11-12  10:15 pm  Pranshu ਗਬਰੂ शर्मा💫                 😊😊     0.000   
4 2020-11-12  10:16 pm            debashish                  👍     0.000   

   Negative  Neutral  
0       0.0    1.000  
1       0.0    0.408  
2       0.0    1.000  
3       0.0    1.000  
4       0.0    0.000  


# Sentiment score

In [19]:
x = sum(data["Positive"])
y = sum(data["Negative"])
z = sum(data["Neutral"])

def sentiment_score(a, b, c):
    if (a>b) and (a>c):
        print("Positive 😊 ")
    elif (b>a) and (b>c):
        print("Negative 😠 ")
    else:
        print("Neutral 🙂 ")
sentiment_score(x, y, z)

Neutral 🙂 


# Therefore we can analyze texts on whatsapp as mentioned above

## Designing a function to take whatsapp chat .txt file as an input in a systematic way

In [38]:
def predict_chat(a):
    data = []
    conversation = a

    with open(conversation, encoding="utf-8") as fp:
        fp.readline()
        messageBuffer = []
        date, time, author = None, None, None
        while True:
          line = fp.readline()
          if not line:
            break
          line = line.strip()
          if date_time(line):
            if len(messageBuffer) > 0:
                data.append([date, time, author, ' '.join(messageBuffer)])
            messageBuffer.clear()
            date, time, author, message = getDatapoint(line)
            messageBuffer.append(message)
        else:
            messageBuffer.append(line)
            
    df = pd.DataFrame(data, columns=["Date", 'Time', 'Author', 'Message'])
    df['Date'] = pd.to_datetime(df['Date'])

    data = df.dropna()
    from nltk.sentiment.vader import SentimentIntensityAnalyzer
    sentiments = SentimentIntensityAnalyzer()
    data["Positive"] = [sentiments.polarity_scores(i)["pos"] for i in data["Message"]]
    data["Negative"] = [sentiments.polarity_scores(i)["neg"] for i in data["Message"]]
    data["Neutral"] = [sentiments.polarity_scores(i)["neu"] for i in data["Message"]]
    print(data.head())       
    x = sum(data["Positive"])
    y = sum(data["Negative"])
    z = sum(data["Neutral"])
    
    print()
    print("The Sentiment Analyzer:")

    def sentiment_score(a, b, c):
      if (a>b) and (a>c):
        print("Positive 😊 ")
      elif (b>a) and (b>c):
        print("Negative 😠 ")
      else:
        print("Neutral 🙂 ")
    sentiment_score(x, y, z)

In [39]:
a ='WhatsApp Chat with debashish.txt'

In [40]:
predict_chat(a)

        Date      Time               Author            Message  Positive  \
0 2020-11-12  10:12 pm            debashish    <Media omitted>     0.000   
1 2020-11-12  10:15 pm  Pranshu ਗਬਰੂ शर्मा💫  thanks a lot bhai     0.592   
2 2020-11-12  10:15 pm            debashish              🚾 Bro     0.000   
3 2020-11-12  10:15 pm  Pranshu ਗਬਰੂ शर्मा💫                 😊😊     0.000   
4 2020-11-12  10:16 pm            debashish                  👍     0.000   

   Negative  Neutral  
0       0.0    1.000  
1       0.0    0.408  
2       0.0    1.000  
3       0.0    1.000  
4       0.0    0.000  

The Sentiment Analyzer:
Neutral 🙂 


In [41]:
b = 'WhatsApp Chat with Aunty.txt'
predict_chat(b)

        Date     Time               Author  \
0 2018-11-24  4:55 pm  Pranshu ਗਬਰੂ शर्मा💫   
1 2018-11-24  4:55 pm  Pranshu ਗਬਰੂ शर्मा💫   
2 2018-11-24  4:55 pm  Pranshu ਗਬਰੂ शर्मा💫   
3 2018-11-24  5:14 pm  Pranshu ਗਬਰੂ शर्मा💫   
4 2018-11-24  5:14 pm  Pranshu ਗਬਰੂ शर्मा💫   

                                             Message  Positive  Negative  \
0                                        Hi aunty 😃🙂     0.000     0.000   
1                                  It's pranshu here     0.000     0.000   
2                                                  🙂     0.000     0.000   
3  I wanted to have a sort of conversation with u...     0.081     0.000   
4                              Sorry for bothering u     0.000     0.796   

   Neutral  
0    1.000  
1    1.000  
2    0.000  
3    0.919  
4    0.204  

The Sentiment Analyzer:
Neutral 🙂 
