In [1]:
from textblob import TextBlob
import nltk
import json
from nltk.tokenize import word_tokenize, sent_tokenize
import itertools
import matplotlib.pyplot as plt
from nltk.sentiment import SentimentIntensityAnalyzer
import os
import glob
import re
import math

## initialize user_data dictionary
{username:[name, {maxscore:  , message:  },  {minscore: , message: },  [all_scores]]

In [2]:
def get_user_data():
    # open users file and get user_id and user_name
    users_data = {}
    with open('../data/slack/users.json') as f:
        users = json.load(f)
    for i in range(len(users)):
        user_id = users[i]['id']
        user_name = users[i]['name']
        all_scores = []
        scores = []
        max_score_dict = {}
        max_score_dict['maxscorevalue'] = -2
        max_score_dict['message'] = ''
        min_score_dict = {}
        min_score_dict['minscorevalue']  = 2
        min_score_dict['message'] = ''
        scores.append(user_name)
        scores.append(max_score_dict)
        scores.append(min_score_dict)
        scores.append(all_scores)
        if user_name not in users_data:
            users_data[user_id] = scores
#     print(users_data)
    return users_data

## Loop all chat histories data in files

In [3]:
def loop_all_data_in_files():
#   get all users from get_user_data() function
    users_data = get_user_data()
    
#   get immediate subdirectories under data/slack
    slack_dir = '../data/slack'
    list_subfolders_with_paths = [f.path for f in os.scandir(slack_dir) if f.is_dir()]
    
#   save all files path into an array
    list_files_with_paths = []
    for subfolder in list_subfolders_with_paths:
        for file in os.listdir(subfolder):
            list_files_with_paths.append(os.path.join(subfolder, file))
            
#   loop files path array
    for path in list_files_with_paths: 
#       call get_main_data_from_chat_history
        get_main_data_from_chat_history(path, users_data)
    return users_data

In [4]:
def clean_tag(sentence):
    tag = re.compile('<.*?>')
    sentence_without_tag = re.sub(tag, '', sentence)
    return sentence_without_tag

## Calculate each sentence in a text. Return currentMaxScore and its message

In [5]:
def calculate_sentence_score_by_model(sentences):
#   separate_sentances, separate_words = get_main_data_from_chat_history()
    nltk.download('vader_lexicon')
    scores = []
    min_score = 2
    min_score_message = ''
    max_score = -2
    max_score_message = ''
    for i in range(len(sentences)):
        sentence = sentences[i]
        sentence = clean_tag(sentence)
        sentence_score = SentimentIntensityAnalyzer().polarity_scores(sentence)["compound"]
        if sentence_score < min_score:
            min_score = sentence_score
            min_score_message = sentence
        if sentence_score > max_score:
            max_score = sentence_score
            max_score_message = sentence
        scores.append(sentence_score)
#         print(max_score,min_score)
    return [[max_score, max_score_message], [min_score, min_score_message], scores]

## Get messages from each file, split messages into sentences and calculate it. 

In [6]:
def get_main_data_from_chat_history(path, users_data):
#   open file with path
    with open(path) as f:
        data = json.load(f)
#   download nltk to tokenize the messages into sentences   
    nltk.download('punkt')
    for i in range(len(data)):
        if 'user' in data[i]:
            text = data[i]['text']
            user = data[i]['user']
            scores = calculate_sentence_score_by_model(sent_tokenize(text))
            max_score_in_dict = users_data[user][1]['maxscorevalue']
            max_score_in_last_calcu = scores[0][0]
            min_score_in_dict = users_data[user][2]['minscorevalue']
            min_score_in_last_calcu = scores[1][0]
            if max_score_in_last_calcu > max_score_in_dict:
#                 print("max shoud change")
                users_data[user][1]['message'] = scores[0][1]
                users_data[user][1]['maxscorevalue'] = max_score_in_last_calcu
            if min_score_in_last_calcu < min_score_in_dict:
                users_data[user][2]['message'] = scores[1][1]
                users_data[user][2]['minscorevalue'] = min_score_in_last_calcu
            users_data[user][3].extend(scores[2])
#     print(users_data)
    return

## Calculate avg, median score and return result in required format
now : {username:[name, {maxscore:  , message:  },  {minscore: , message: },  [all_scores]]
required: 
{"data":
    [
       {"name":
        "avgscore":
        "medianscore":
        tmaxscore: {maxscorevalue:  , message. }
        minScore: {minscorevalue:  , message  }
       },
       
       {},
    ]
}


In [7]:
import numpy

In [8]:
def calculate_avg_var_median(all_scores):
#     all_scores = users_data[3]
    avg_score = numpy.mean(all_scores)
    median_score = numpy.median(all_scores)
    variance_score = numpy.var(all_scores)
    if math.isnan(avg_score):
        avg_score = -2
    if math.isnan(median_score):
        median_score = -2
    if math.isnan(variance_score):
        variance_score = -2
    return [avg_score, median_score, variance_score]


In [9]:
def modify_result_format():
    users_data = loop_all_data_in_files()
    result_users_data_list = []
    result_users_data = {}
    for user_id in users_data:
        scores = calculate_avg_var_median(users_data[user_id][3])
        avg_score = scores[0]
        median_score = scores[1]
        variance_score = scores[2]
        user_name = users_data[user_id][0]
        maxscore_dict = users_data[user_id][1]
        minscore_dict = users_data[user_id][2]
        user = {}
        user['name'] = user_name
        user['avgscore'] = avg_score
        user['variancescore'] = variance_score
        user['medianscore'] = median_score
        user['maxscore'] = maxscore_dict
        user['minscore'] = minscore_dict
        result_users_data_list.append(user)
    result_users_data['data'] = result_users_data_list
#     print(result_users_data)
    return result_users_data

## Change result format to json

In [10]:
def get_user_data_in_json():
    users_data = modify_result_format()
    data_in_json = json.dumps(users_data, indent=4)
    print(data_in_json)
    return 
get_user_data_in_json()

[nltk_data] Downloading package punkt to /Users/zhiyiniu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /Users/zhiyiniu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /Users/zhiyiniu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Packag

[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /Users/zhiyiniu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/n

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /Users/zhiyiniu/nltk_data...
[nltk_data] 

[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon i

{
    "data": [
        {
            "name": "simon.speth",
            "avgscore": 0.0,
            "variancescore": 0.0,
            "medianscore": 0.0,
            "maxscore": {
                "maxscorevalue": 0.0,
                "message": " has joined the channel"
            },
            "minscore": {
                "minscorevalue": 0.0,
                "message": " has joined the channel"
            }
        },
        {
            "name": "guyiyu",
            "avgscore": 0.033025000000000006,
            "variancescore": 0.072668091875,
            "medianscore": 0.0,
            "maxscore": {
                "maxscorevalue": 0.6486,
                "message": "Hi nice to meet you all :wave::skin-tone-2:\nWould like to ask, when you guys prefer an icebreaker video chat?"
            },
            "minscore": {
                "minscorevalue": -0.9027,
                "message": "2021/04/25 13:18:56 handlers.go:373: Get \"\": dial tcp 172.21.0.3:5430: connect: connect

[nltk_data] Downloading package punkt to /Users/zhiyiniu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/zhiyiniu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
