# 1. Importing libraries and loading Data

### 1.1 Installing necessary libraries


In [3]:
#Tejas Model 
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import emoji
import re
import string
from transformers import TFBertModel, BertTokenizerFast, BertConfig
import tensorflow as tf
from keras.layers import Input, Dropout, Dense, BatchNormalization
from keras.models import Model
from sklearn.metrics import f1_score, recall_score, precision_score
from tensorflow.keras.utils import plot_model
from keras.initializers import TruncatedNormal
import keras.backend as K



In [None]:
from huggingface_hub import HfApi

hf_api = HfApi()
models = hf_api.list_models()

### 1.3 Helper Functions

In [4]:
def idx2class(idx_list):
    """
    This function converts a list of class indices to a list of class labels.

    Parameters
    ----------
    idx_list : list
        List of class indices.
    
    Returns
    -------
    class_list : list
        List of class labels.
    """
    arr = []
    for i in idx_list:
        arr.append(labels[int(i)])
    return arr

def EmotionMapping(list_of_emotions):
    list = []  
    for i in list_of_emotions:
        if i in ekman_map['anger']:
            list.append('anger')
        if i in ekman_map['fear']:
            list.append('fear')
        if i in ekman_map['joy']:
            list.append('joy')
        if i in ekman_map['sadness']:
            list.append('sadness')
        if i in ekman_map['surprise']:
            list.append('surprise')
        if i == 'neutral':
            list.append('neutral')      
    return list

def SentimentMapping(list_of_emotions):
    list = []  
    for i in list_of_emotions:
        if i in sentiment_map['positive']:
            list.append('positive')
        if i in sentiment_map['negative']:
            list.append('negative')
        if i in sentiment_map['ambiguous']:
            list.append('ambiguous')
    return list

### 1.4 Loading data

In [5]:
train_url = 'https://raw.githubusercontent.com/google-research/google-research/master/goemotions/data/train.tsv'
valid_url = 'https://github.com/google-research/google-research/raw/master/goemotions/data/dev.tsv'
test_url = 'https://raw.githubusercontent.com/google-research/google-research/master/goemotions/data/train.tsv'

In [6]:
train_df = pd.read_csv(train_url, sep='\t', encoding='utf-8',
                       names=['text', 'emotion', 'annotator'], header=None)
valid_df = pd.read_csv(valid_url, sep='\t', encoding='utf-8',
                       names=['text', 'emotion', 'annotator'], header=None)
test_df = pd.read_csv(test_url, sep='\t', encoding='utf-8',
                      names=['text', 'emotion', 'annotator'], header=None)

In [7]:
train_df.head(10)


Unnamed: 0,text,emotion,annotator
0,My favourite food is anything I didn't have to...,27,eebbqej
1,"Now if he does off himself, everyone will thin...",27,ed00q6i
2,WHY THE FUCK IS BAYLESS ISOING,2,eezlygj
3,To make her feel threatened,14,ed7ypvh
4,Dirty Southern Wankers,3,ed0bdzj
5,OmG pEyToN iSn'T gOoD eNoUgH tO hElP uS iN tHe...,26,edvnz26
6,Yes I heard abt the f bombs! That has to be wh...,15,ee3b6wu
7,We need more boards and to create a bit more s...,820,ef4qmod
8,Damn youtube and outrage drama is super lucrat...,0,ed8wbdn
9,It might be linked to the trust factor of your...,27,eczgv1o


### 1.5 Preprocessing

Column 2 "annotator" is unnecessary, so we can drop it.


In [8]:
train_df.drop('annotator', axis=1, inplace=True)
valid_df.drop('annotator', axis=1, inplace=True)
test_df.drop('annotator', axis=1, inplace=True)

Dictionaries for mapping emotions to indices and vice versa. 

The variable `ekman_map` is used to map 27 emotions to 7 emotions. This is done to reduce the number of classes.

The 27 emotions can also be mapped to the 3 emotions using the `sentiment_map` dictionary for sentiment analysis tasks.


In [9]:
labels = {
    0: 'admiration',
    1: 'amusement',
    2: 'anger',
    3: 'annoyance',
    4: 'approval',
    5: 'caring',
    6: 'confusion',
    7: 'curiosity',
    8: 'desire',
    9: 'disappointment',
    10: 'disapproval',
    11: 'disgust',
    12: 'embarrassment',
    13: 'excitement',
    14: 'fear',
    15: 'gratitude',
    16: 'grief',
    17: 'joy',
    18: 'love',
    19: 'nervousness',
    20: 'optimism',
    21: 'pride',
    22: 'realization',
    23: 'relief',
    24: 'remorse',
    25: 'sadness',
    26: 'surprise',
    27: 'neutral'
}

ekman_map = {
    'anger': ['anger', 'annoyance', 'disapproval', 'confusion','disgust'],
    'fear': ['fear', 'nervousness'],
    'joy': ['joy', 'amusement', 'approval', 'excitement', 'gratitude',  'love', 'optimism', 'relief', 'pride', 'admiration', 'desire', 'caring'],
    'sadness': ['sadness', 'disappointment', 'embarrassment', 'grief',  'remorse'],
    'surprise': ['surprise', 'realization', 'confusion', 'curiosity'],
    'neutral': ['neutral']
}

sentiment_map = {
    "positive": ["amusement", "excitement", "joy", "love", "desire", "optimism", "caring", "pride", "admiration", "gratitude", "relief", "approval"],
    "negative": ["fear", "nervousness", "remorse", "embarrassment", "disappointment", "sadness", "grief", "disgust", "anger", "annoyance", "disapproval"],
    "ambiguous": ["realization", "surprise", "curiosity", "confusion", "neutral"]
}

First, let's extract all emotions from the each example and store them in a list.

In [10]:
train_df['list of emotions'] = train_df['emotion'].apply(lambda x: x.split(','))
test_df['list of emotions'] = test_df['emotion'].apply(lambda x: x.split(','))
valid_df['list of emotions'] = valid_df['emotion'].apply(lambda x: x.split(','))

We can then apply index to class mapping to get the class labels for each row

In [11]:
train_df['emotion'] = train_df['list of emotions'].apply(lambda x: idx2class(x))
test_df['emotion'] = test_df['list of emotions'].apply(lambda x: idx2class(x))
valid_df['emotion'] = valid_df['list of emotions'].apply(lambda x: idx2class(x))

Finally, we can reduce the number of classes to 7 by using the EmotionMapping function.

In [12]:
train_df['ekman_emotion'] = train_df['emotion'].apply(lambda x: EmotionMapping(x))
test_df['ekman_emotion'] = test_df['emotion'].apply(lambda x: EmotionMapping(x))
valid_df['ekman_emotion'] = valid_df['emotion'].apply(lambda x: EmotionMapping(x))

In [13]:
train_df.head(10)

Unnamed: 0,text,emotion,list of emotions,ekman_emotion
0,My favourite food is anything I didn't have to...,[neutral],[27],[neutral]
1,"Now if he does off himself, everyone will thin...",[neutral],[27],[neutral]
2,WHY THE FUCK IS BAYLESS ISOING,[anger],[2],[anger]
3,To make her feel threatened,[fear],[14],[fear]
4,Dirty Southern Wankers,[annoyance],[3],[anger]
5,OmG pEyToN iSn'T gOoD eNoUgH tO hElP uS iN tHe...,[surprise],[26],[surprise]
6,Yes I heard abt the f bombs! That has to be wh...,[gratitude],[15],[joy]
7,We need more boards and to create a bit more s...,"[desire, optimism]","[8, 20]","[joy, joy]"
8,Damn youtube and outrage drama is super lucrat...,[admiration],[0],[joy]
9,It might be linked to the trust factor of your...,[neutral],[27],[neutral]


In [14]:
def clean_text(text):
    """
    This function cleans the text in the dataframe and returns a list of cleaned text.
    text: a string

    return: modified initial string
    """
    # Removing Emojis
    text = emoji.demojize(text)  # remove emojis
    text = str(text).lower()  # text to lower case
    text = re.sub(r'[%s]' % re.escape(string.punctuation), ' ', text)  # remove punctuation
    return text

One hot encoding of emotions 

In [15]:
for i in ekman_map:
    train_df[i] = train_df['ekman_emotion'].apply(lambda x: 1 if i in x else 0)
    test_df[i] = test_df['ekman_emotion'].apply(lambda x: 1 if i in x else 0)
    valid_df[i] = valid_df['ekman_emotion'].apply(lambda x: 1 if i in x else 0)

In [16]:
train_df.head(10)

Unnamed: 0,text,emotion,list of emotions,ekman_emotion,anger,fear,joy,sadness,surprise,neutral
0,My favourite food is anything I didn't have to...,[neutral],[27],[neutral],0,0,0,0,0,1
1,"Now if he does off himself, everyone will thin...",[neutral],[27],[neutral],0,0,0,0,0,1
2,WHY THE FUCK IS BAYLESS ISOING,[anger],[2],[anger],1,0,0,0,0,0
3,To make her feel threatened,[fear],[14],[fear],0,1,0,0,0,0
4,Dirty Southern Wankers,[annoyance],[3],[anger],1,0,0,0,0,0
5,OmG pEyToN iSn'T gOoD eNoUgH tO hElP uS iN tHe...,[surprise],[26],[surprise],0,0,0,0,1,0
6,Yes I heard abt the f bombs! That has to be wh...,[gratitude],[15],[joy],0,0,1,0,0,0
7,We need more boards and to create a bit more s...,"[desire, optimism]","[8, 20]","[joy, joy]",0,0,1,0,0,0
8,Damn youtube and outrage drama is super lucrat...,[admiration],[0],[joy],0,0,1,0,0,0
9,It might be linked to the trust factor of your...,[neutral],[27],[neutral],0,0,0,0,0,1


### 2.1 Base model config

#### Computing max length of samples

`max_length` variable is used to limit the length of the input text that is fed to the model. The sequence will be padded with the `<PAD>` token if the length of the sequence is less than `max_length` and the sequence will be truncated if the length of the sequence is more than `max_length`. This is done to ensure that the model can handle any size of input text.

In [17]:
full_text = pd.concat([train_df['text'], valid_df['text'], test_df['text']])
max_length = full_text.apply(lambda x: len(x.split())).max()
max_length

33

I am going to use Google's BERT base model which contains 110M parameters.

In [18]:
model_name = 'bert-base-uncased'
config = BertConfig.from_pretrained(model_name, output_hidden_states=False)
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = model_name, config = config)
transformer_model = TFBertModel.from_pretrained(model_name, config = config)

Metal device set to: Apple M2


2022-11-23 15:28:46.520113: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-11-23 15:28:46.520544: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identi

### 2.2 Model architecture

model takes three inputs that result from tokenization:

- `input_ids`: indices of input sequence tokens in the vocabulary
- `token_type_ids`: Segment token indices to indicate first and second portions of the inputs. 0 for sentence A and 1 for sentence B
- `attention mask`: Mask to avoid performing attention on padding token indices. 0 for masked and 1 for not masked

I have a sigmoided output layer in the model because it is more appropriate than a softmax layer. This is because I are trying to predict the probability of each label and not the label itself.


In [19]:
def my_model(n_labels):

  # Load the MainLayer
  bert = transformer_model.layers[0]

  ## INPUTS
  input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
  attention_mask = Input(shape=(max_length,), name='attention_mask', dtype='int32')
  token_type_ids = Input(shape=(max_length,), name='token_type_ids', dtype='int32')
  inputs = {'input_ids': input_ids, 'attention_mask': attention_mask, 'token_type_ids': token_type_ids}

  ## LAYERS
  bert_model = bert(inputs)[1]
  dropout = Dropout(config.hidden_dropout_prob, name='pooled_output')
  pooled_output = dropout(bert_model, training=False)

  ## OUTPUT
  emotion = Dense(units=n_labels, activation='sigmoid', kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='emotion')(pooled_output)
  outputs = emotion

  model = Model(inputs=inputs, outputs=outputs, name='BERT_Emotion_Classifier')

  return model

In [20]:
model = my_model(len(ekman_map))
model.summary()

Model: "BERT_Emotion_Classifier"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 attention_mask (InputLayer)    [(None, 33)]         0           []                               
                                                                                                  
 input_ids (InputLayer)         [(None, 33)]         0           []                               
                                                                                                  
 token_type_ids (InputLayer)    [(None, 33)]         0           []                               
                                                                                                  
 bert (TFBertMainLayer)         TFBaseModelOutputWi  109482240   ['attention_mask[0][0]',         
                                thPoolingAndCrossAt               'input_ids

In [21]:
plot_model(model, show_shapes=True, dpi=300)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


### 2.3 Data tokenization

In [22]:
## Train 
x_train = train_df['text']
y_train = train_df.loc[:, ekman_map.keys()].values

train_tokenized = tokenizer(
    text = list(x_train),
    add_special_tokens = True,
    max_length = max_length,
    padding = 'max_length',
    truncation = True,
    return_tensors = 'tf',
    return_attention_mask = True,
    return_token_type_ids = True
)

# pickle.dump(train_tokenized, open("/content/drive/MyDrive/Variables/train_tokenized.pkl", 'wb'))



In [23]:
# with open("/content/drive/MyDrive/Variables/train_tokenized.pkl", 'rb') as train_file:
#     train_tokenized = pickle.load(train_file)
 

In [24]:
## Test
x_test = test_df['text']
y_test = test_df.loc[:, ekman_map.keys()].values

test_tokenized = tokenizer(
    text = list(x_test),
    add_special_tokens = True,
    max_length = max_length,
    padding = 'max_length',
    truncation = True,
    return_tensors = 'tf',
    return_attention_mask = True,
    return_token_type_ids = True
)

# pickle.dump(test_tokenized, open("/content/drive/MyDrive/Variables/test_tokenized.pkl", 'wb'))

In [25]:
# with open("/content/drive/MyDrive/Variables/train_tokenized.pkl", 'rb') as test_file:
#     test_tokenized = pickle.load(test_file)

In [26]:
## Validation
x_valid = valid_df['text']
y_valid = valid_df.loc[:, ekman_map.keys()].values

valid_tokenized = tokenizer(
    text = list(x_valid),
    add_special_tokens = True,
    max_length = max_length,
    padding = 'max_length',
    truncation = True,
    return_tensors = 'tf',
    return_attention_mask = True,
    return_token_type_ids = True
)

#pickle.dump(valid_tokenized, open("/content/drive/MyDrive/Variables/valid_tokenized.pkl", 'wb'))

In [27]:
# with open("/content/drive/MyDrive/Variables/valid_tokenized.pkl", 'rb') as valid_file:
#     valid_tokenized = pickle.load(valid_file)

### 2.4 Creating BERT compatible inputs

In [28]:
tf_train = {'input_ids': train_tokenized['input_ids'], 'attention_mask': train_tokenized['attention_mask'], 'token_type_ids': train_tokenized['token_type_ids']}
tf_test = {'input_ids': test_tokenized['input_ids'], 'attention_mask': test_tokenized['attention_mask'], 'token_type_ids': test_tokenized['token_type_ids']}
tf_valid = {'input_ids': valid_tokenized['input_ids'], 'attention_mask': valid_tokenized['attention_mask'], 'token_type_ids': valid_tokenized['token_type_ids']}

In [29]:
train = tf.data.Dataset.from_tensor_slices((tf_train, y_train)).batch(80)
valid = tf.data.Dataset.from_tensor_slices((tf_valid, y_valid)).batch(80)
test = tf.data.Dataset.from_tensor_slices((tf_test, y_test)).batch(80)

In [30]:
from tensorflow.keras import backend as K
K.clear_session()

Prior experiments with BERT showed that the model starts to overfit after ~2 epochs and Tanh performed significantly worse than sigmoid.

# 3. Evaluation

When dealing with unbalanced data, it is essential to mini-batch train the model instead of training it on all the data. This helps to prevent the model from overfitting the minority class. It is also essential to be thoughtful about what metric is being used for model evaluation. When dealing with unbalanced data, accuracy is not a good metric, as the model can predict the majority class every time and still have high accuracy. Instead, it is crucial to use the precision/recall or the F1 score, as these metrics consider false positives and false negatives.


In [31]:
model = my_model(len(ekman_map))
model.load_weights('Hapiness_Meter_one.h5')

In [32]:
THRESHOLD = 0.83

# y_pred = model.predict(test)
# import pickle
# pickle.dump(y_pred, open("y_pred.pkl", 'wb'))
# model.save_weights('/content/drive/MyDrive/model/threshold.h5')
# import pickle
# # pickle.dump(y_pred, open("y_pred.pkl", 'wb'))
with open("y_pred.pkl", 'rb') as ypred_file:
    y_pred = pickle.load(ypred_file)

In [33]:
probabilities = y_pred

probabilities = pd.DataFrame(probabilities, columns=ekman_map.keys())
probabilities.index = x_test
probabilities.reset_index(inplace=True)
probabilities.head(10)

Unnamed: 0,text,anger,fear,joy,sadness,surprise,neutral
0,My favourite food is anything I didn't have to...,0.001167,0.001536,0.525426,0.001361,0.033383,0.660853
1,"Now if he does off himself, everyone will thin...",0.006564,0.001458,0.014804,0.003272,0.008545,0.984385
2,WHY THE FUCK IS BAYLESS ISOING,0.97438,0.00317,0.011093,0.013022,0.011833,0.036757
3,To make her feel threatened,0.090632,0.928057,0.089613,0.039748,0.012046,0.042805
4,Dirty Southern Wankers,0.976233,0.004245,0.015103,0.014472,0.008927,0.034423
5,OmG pEyToN iSn'T gOoD eNoUgH tO hElP uS iN tHe...,0.097964,0.005073,0.002139,0.387786,0.786229,0.039062
6,Yes I heard abt the f bombs! That has to be wh...,0.015234,0.016906,0.996662,0.006778,0.034883,0.004226
7,We need more boards and to create a bit more s...,0.003835,0.000937,0.961643,0.001522,0.010248,0.097953
8,Damn youtube and outrage drama is super lucrat...,0.011543,0.001208,0.990842,0.00304,0.0038,0.013122
9,It might be linked to the trust factor of your...,0.008172,0.001908,0.016478,0.001939,0.029351,0.97825


In [34]:
y_pred = np.where(y_pred > THRESHOLD, 1, 0)

recall = []
f1 = []
precision = []
emotions = ekman_map.keys()

for i in range(len(emotions)):
    f1.append(f1_score(y_test[:, i], y_pred[:, i], average='macro'))
    precision.append(precision_score(y_test[:, i], y_pred[:, i], average='macro'))

results = pd.DataFrame({'precision': precision, 'f1': f1})
results.index = emotions

means = {'precision': np.mean(precision), 'f1': np.mean(f1)}
means = pd.DataFrame(means, index=['mean'])


In [35]:
pd.concat([results, means], axis=0)

Unnamed: 0,precision,f1
anger,0.966605,0.932673
fear,0.984553,0.9526
joy,0.971919,0.966772
sadness,0.974092,0.945795
surprise,0.966318,0.919316
neutral,0.936971,0.883788
mean,0.966743,0.933491


### 3.1 Optimization

Finding the best value of Threshold. I chose f1-score as the main metric because it is more robust than precision and recall alone.

In [36]:
best_threshold = 0
best_precision = 0
# pred = model.predict(test)
# pickle.dump(pred, open("pred.pkl", 'wb'))
with open("pred.pkl", 'rb') as pred_file:
    pred = pickle.load(pred_file)

for threshold in np.arange(0.30, 0.99, 0.01):
    preds = np.where(pred > threshold, 1, 0)

    precision = precision_score(y_test, preds, average='macro', zero_division=0)

    if precision > best_precision:
        best_threshold = threshold
        best_precision = precision
    else:
        continue

In [37]:
print(f'Best threshold: {best_threshold}\nBest precision: {best_precision}')

Best threshold: 0.9700000000000006
Best precision: 0.9954029021444736


In [38]:
THRESHOLD = 0.39

## 4. Make Predictions

In [50]:
def pred(text, model, THRESHOLD):

    text = [clean_text(text) for text in text]
    
    tokenized = tokenizer(
        text = text,
        add_special_tokens = True,
        max_length = max_length,
        padding = 'max_length',
        truncation = True,
        return_tensors = 'tf',
        return_attention_mask = True,
        return_token_type_ids = True
    )

    tf_test = {'input_ids': tokenized['input_ids'], 'attention_mask': tokenized['attention_mask'], 'token_type_ids': tokenized['token_type_ids']}

    pred = model.predict(tf_test)

    probabilities = pred
    probabilities = pd.DataFrame(probabilities*100, columns=ekman_map.keys())
    probabilities.index = text
    probabilities.reset_index(inplace=True)

    pred = np.where(pred > THRESHOLD, 1, 0)

    pred = pd.DataFrame(pred, columns=ekman_map.keys())
    pred['emotion'] = pred.iloc[:, 1:].idxmax(axis=1)
    pred.drop(columns=emotions, inplace=True)
    pred.index = text
    pred.reset_index(inplace=True)

    return pred, probabilities

In [48]:
result, probabilities = pred(["i am not happy with the services you provided"], model, THRESHOLD)



In [49]:
result

Unnamed: 0,index,emotion
0,i am not happy with the services you provided,sadness


In [41]:
probabilities


Unnamed: 0,index,anger,fear,joy,sadness,surprise,neutral
0,very disappointed with the construction quality,2.077125,0.572663,5.636095,96.720848,1.459113,2.370996


In [102]:
data = pd.read_excel("Customer_Mails.xlsx")
data

Unnamed: 0,Subject,Sender.Name,Sender.Address,ToRecipients.Name,ToRecipients.Address,DateTimeSent,DateTimeReceived,Importance,Body.TextBody
0,Fwd: PAYMENT OF RS.450000/-,HANAMANTHRAO CHOUDRY,hrchoudry@gmail.com,Shalini Shah @ Gera Customer Relations,shalini.shah@gera.in,2022-09-10 14:08:06,2022-09-10 14:10:41,Normal,"Dear Sir/Madam,_x000D_\nFYI,_x000D_\nAs per th..."
1,Credentials for Gera world of joy application ...,nilesh hajare,hajarenilesh@gmail.com,Sanjeev Yadav @ Gera Customer Relations,customersupportpojrm3@gera.in,2022-09-10 13:19:19,2022-09-10 13:19:37,Normal,"Dear Gera Team,_x000D_\n_x000D_\nI am holding ..."
2,RE: [EXT] Fw: Important Notification - Usage o...,Subramanian Balaji Lakshmi,balaji.subramanian@michelin.com,Sanjeev Yadav @ Gera Customer Relations,customersupportpojrm3@gera.in,2022-09-10 13:10:29,2022-09-10 13:11:07,Normal,"Hello Sanjeev & Ajinkya ,_x000D_\n_x000D_\nAs ..."
3,Reimbursement of Sep EMI for G-1510 and G-1511,sunil singh,sunilsingh13@gmail.com,Varsha Vats @ Gera Customer Relations,customersupportpojrm2@gera.in,2022-09-10 12:36:45,2022-09-10 12:37:22,Normal,"For my units G-1510 and G-1511, please do the ..."
4,Re: Intent of Termination Letter for Mr. Ramra...,Ramraj Pandey,pramraj@gmail.com,Paulson Adatkaran @ Gera Customer Relations,paulson.julies@gera.in,2022-09-10 12:34:33,2022-09-10 12:34:53,Normal,"Hi Mr. Adatkaran,_x000D_\n_x000D_\nI have tran..."
...,...,...,...,...,...,...,...,...,...
2961,C116_Original agreement copy,Subhash Patil,smpatil255@gmail.com,Pallavi Ladkat @ Gera Customer Relations,pallavi.ladkat@gera.in,2022-08-05 16:40:45,2022-08-05 16:41:03,Normal,"Dear sir,_x000D_\n_x000D_\nMyself Subhash Pati..."
2962,Change of contact details,Vartika,vartika134@gmail.com,Shalini Shah @ Gera Customer Relations,shalini.shah@gera.in,2022-08-05 15:57:26,2022-08-05 15:57:44,Normal,"Hi Shalini,_x000D_\n_x000D_\nAs discussed, can..."
2963,,Sayali Gujrathi,sayaligujrathi7@gmail.com,Paulson Adatkaran @ Gera Customer Relations,paulson.julies@gera.in,2022-08-05 13:56:16,2022-08-05 13:56:37,Normal,"Dear sir ,_x000D_\n_x000D_\n_x000D_\n_x000D_\n..."
2964,Re: Slot for Project L - Confirmation,Meher Komal,m.komal27@gmail.com,Stephen Natkar @ Gera Customer Relations,wojcustomersupport1@gera.in,2022-08-05 13:00:45,2022-08-05 13:01:08,Normal,"Ok, thank you._x000D_\n_x000D_\nOn Fri, Aug 5,..."


In [103]:
data['Body.TextBody'] = data['Body.TextBody'].astype(str)
data['Body.TextBody'] = data['Body.TextBody'].replace("_x000D_\n", " ", regex=True)

In [104]:
result, probabilities = pred(data.iloc[:,8], model, THRESHOLD)

2022-09-12 12:03:19.775943: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




In [75]:
probabilities.drop(columns="index")

Unnamed: 0,anger,fear,joy,sadness,surprise,neutral,emotion
0,0.200572,0.250053,4.765276,0.421653,0.559019,97.954773,neutral
1,0.239305,0.121922,97.771385,0.164696,0.642805,4.157209,joy
2,69.368088,0.154405,0.776611,2.402454,63.111626,53.569763,anger
3,0.146886,0.199758,25.413540,0.177049,0.810602,88.503799,neutral
4,0.169846,0.122111,74.816536,0.128957,1.485788,47.680176,joy
...,...,...,...,...,...,...,...
2961,0.141272,0.110589,41.940178,0.214269,0.608693,83.128555,neutral
2962,80.817909,0.079174,3.561758,0.128391,39.027832,68.069847,anger
2963,0.281418,0.142284,4.197897,0.294429,0.616587,97.440353,neutral
2964,0.404224,0.128871,99.310287,0.234794,0.810185,1.004376,joy


In [76]:
data = data.join(probabilities)
data

Unnamed: 0,Subject,Sender.Name,Sender.Address,ToRecipients.Name,ToRecipients.Address,DateTimeSent,DateTimeReceived,Importance,Body.TextBody,index,anger,fear,joy,sadness,surprise,neutral,emotion
0,Fwd: PAYMENT OF RS.450000/-,HANAMANTHRAO CHOUDRY,hrchoudry@gmail.com,Shalini Shah @ Gera Customer Relations,shalini.shah@gera.in,2022-09-10 14:08:06,2022-09-10 14:10:41,Normal,"Dear Sir/Madam, FYI, As per the Telephonic con...",dear sir madam fyi as per the telephonic con...,0.200572,0.250053,4.765276,0.421653,0.559019,97.954773,neutral
1,Credentials for Gera world of joy application ...,nilesh hajare,hajarenilesh@gmail.com,Sanjeev Yadav @ Gera Customer Relations,customersupportpojrm3@gera.in,2022-09-10 13:19:19,2022-09-10 13:19:37,Normal,"Dear Gera Team, I am holding B1G01 & B1G02 in...",dear gera team i am holding b1g01 b1g02 in...,0.239305,0.121922,97.771385,0.164696,0.642805,4.157209,joy
2,RE: [EXT] Fw: Important Notification - Usage o...,Subramanian Balaji Lakshmi,balaji.subramanian@michelin.com,Sanjeev Yadav @ Gera Customer Relations,customersupportpojrm3@gera.in,2022-09-10 13:10:29,2022-09-10 13:11:07,Normal,"Hello Sanjeev & Ajinkya , As I discussed with...",hello sanjeev ajinkya as i discussed with...,69.368088,0.154405,0.776611,2.402454,63.111626,53.569763,anger
3,Reimbursement of Sep EMI for G-1510 and G-1511,sunil singh,sunilsingh13@gmail.com,Varsha Vats @ Gera Customer Relations,customersupportpojrm2@gera.in,2022-09-10 12:36:45,2022-09-10 12:37:22,Normal,"For my units G-1510 and G-1511, please do the ...",for my units g 1510 and g 1511 please do the ...,0.146886,0.199758,25.413540,0.177049,0.810602,88.503799,neutral
4,Re: Intent of Termination Letter for Mr. Ramra...,Ramraj Pandey,pramraj@gmail.com,Paulson Adatkaran @ Gera Customer Relations,paulson.julies@gera.in,2022-09-10 12:34:33,2022-09-10 12:34:53,Normal,"Hi Mr. Adatkaran, I have transferred Rs 300,0...",hi mr adatkaran i have transferred rs 300 0...,0.169846,0.122111,74.816536,0.128957,1.485788,47.680176,joy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2961,C116_Original agreement copy,Subhash Patil,smpatil255@gmail.com,Pallavi Ladkat @ Gera Customer Relations,pallavi.ladkat@gera.in,2022-08-05 16:40:45,2022-08-05 16:41:03,Normal,"Dear sir, Myself Subhash Patil having a comme...",dear sir myself subhash patil having a comme...,0.141272,0.110589,41.940178,0.214269,0.608693,83.128555,neutral
2962,Change of contact details,Vartika,vartika134@gmail.com,Shalini Shah @ Gera Customer Relations,shalini.shah@gera.in,2022-08-05 15:57:26,2022-08-05 15:57:44,Normal,"Hi Shalini, As discussed, can you please chan...",hi shalini as discussed can you please chan...,80.817909,0.079174,3.561758,0.128391,39.027832,68.069847,anger
2963,,Sayali Gujrathi,sayaligujrathi7@gmail.com,Paulson Adatkaran @ Gera Customer Relations,paulson.julies@gera.in,2022-08-05 13:56:16,2022-08-05 13:56:37,Normal,"Dear sir , Exhibit 1 @ exhibit 5 All cases ...",dear sir exhibit 1 exhibit 5 all cases ...,0.281418,0.142284,4.197897,0.294429,0.616587,97.440353,neutral
2964,Re: Slot for Project L - Confirmation,Meher Komal,m.komal27@gmail.com,Stephen Natkar @ Gera Customer Relations,wojcustomersupport1@gera.in,2022-08-05 13:00:45,2022-08-05 13:01:08,Normal,"Ok, thank you. On Fri, Aug 5, 2022, 12:54 PM ...",ok thank you on fri aug 5 2022 12 54 pm ...,0.404224,0.128871,99.310287,0.234794,0.810185,1.004376,joy


In [77]:
##Model 2
from keras_preprocessing import text, sequence
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Activation, Dropout, Dense
from sklearn.model_selection import train_test_split
from numpy.random import seed
seed(1) 
tf.random.set_seed(2)

In [78]:
train_df = pd.read_csv( "newtrain.csv" , encoding='windows-1252')

In [79]:
y_pandas_df = pd.get_dummies(train_df['label'])
print(type(y_pandas_df))
print(y_pandas_df.shape)
print(y_pandas_df.ndim)
y_pandas_df.head()

<class 'pandas.core.frame.DataFrame'>
(483, 5)
2


Unnamed: 0,Construction Quality,Escalation/Brand,Legal,Others,Payments/Interest waiver
0,0,1,0,0,0
1,0,1,0,0,0
2,0,1,0,0,0
3,0,1,0,0,0
4,0,0,0,0,1


In [80]:
y = y_pandas_df.values
print("properties of y")
print("type : {}, dimensions : {}, shape : {}, total no. of elements : {}, data type of each element: {}, size of each element {} bytes".format(type(y), y.ndim, y.shape, y.size, y.dtype, y.itemsize))

properties of y
type : <class 'numpy.ndarray'>, dimensions : 2, shape : (483, 5), total no. of elements : 2415, data type of each element: uint8, size of each element 1 bytes


In [81]:
train_df = train_df.drop( ['label' ] , axis=1)

In [82]:
train_df.shape
x = train_df.values.flatten()

In [83]:
list_of_classes = ['Construction Quality',	'Escalation/Brand',	'Legal',	'Others/Social Media','Payments/Interest waiver' ]
max_features = 20000
max_text_length = 500
embedding_dims = 50
batch_size = 20
epochs = 14
num_filters_1 = 250
num_filters_2 = 250
filter_size = 1 

In [84]:
x_tokenizer = text.Tokenizer(num_words=max_features)
x_tokenizer.fit_on_texts(list(x))
x_tokenized = x_tokenizer.texts_to_sequences(x)
x_train_val = sequence.pad_sequences(x_tokenized, maxlen=max_text_length)

In [85]:
test_df = pd.read_excel('Customer_Mails.xlsx')
test_df['Body.TextBody']=test_df['Body.TextBody'].astype(str) #dimensions and headers Need to be udated accordingly
test_df['Body.TextBody'] = test_df['Body.TextBody'].replace("_x000D_\n", "", regex=True)
test_df

Unnamed: 0,Subject,Sender.Name,Sender.Address,ToRecipients.Name,ToRecipients.Address,DateTimeSent,DateTimeReceived,Importance,Body.TextBody
0,Fwd: PAYMENT OF RS.450000/-,HANAMANTHRAO CHOUDRY,hrchoudry@gmail.com,Shalini Shah @ Gera Customer Relations,shalini.shah@gera.in,2022-09-10 14:08:06,2022-09-10 14:10:41,Normal,"Dear Sir/Madam,FYI,As per the Telephonic conve..."
1,Credentials for Gera world of joy application ...,nilesh hajare,hajarenilesh@gmail.com,Sanjeev Yadav @ Gera Customer Relations,customersupportpojrm3@gera.in,2022-09-10 13:19:19,2022-09-10 13:19:37,Normal,"Dear Gera Team,I am holding B1G01 & B1G02 in G..."
2,RE: [EXT] Fw: Important Notification - Usage o...,Subramanian Balaji Lakshmi,balaji.subramanian@michelin.com,Sanjeev Yadav @ Gera Customer Relations,customersupportpojrm3@gera.in,2022-09-10 13:10:29,2022-09-10 13:11:07,Normal,"Hello Sanjeev & Ajinkya ,As I discussed with y..."
3,Reimbursement of Sep EMI for G-1510 and G-1511,sunil singh,sunilsingh13@gmail.com,Varsha Vats @ Gera Customer Relations,customersupportpojrm2@gera.in,2022-09-10 12:36:45,2022-09-10 12:37:22,Normal,"For my units G-1510 and G-1511, please do the ..."
4,Re: Intent of Termination Letter for Mr. Ramra...,Ramraj Pandey,pramraj@gmail.com,Paulson Adatkaran @ Gera Customer Relations,paulson.julies@gera.in,2022-09-10 12:34:33,2022-09-10 12:34:53,Normal,"Hi Mr. Adatkaran,I have transferred Rs 300,000..."
...,...,...,...,...,...,...,...,...,...
2961,C116_Original agreement copy,Subhash Patil,smpatil255@gmail.com,Pallavi Ladkat @ Gera Customer Relations,pallavi.ladkat@gera.in,2022-08-05 16:40:45,2022-08-05 16:41:03,Normal,"Dear sir,Myself Subhash Patil having a commerc..."
2962,Change of contact details,Vartika,vartika134@gmail.com,Shalini Shah @ Gera Customer Relations,shalini.shah@gera.in,2022-08-05 15:57:26,2022-08-05 15:57:44,Normal,"Hi Shalini,As discussed, can you please change..."
2963,,Sayali Gujrathi,sayaligujrathi7@gmail.com,Paulson Adatkaran @ Gera Customer Relations,paulson.julies@gera.in,2022-08-05 13:56:16,2022-08-05 13:56:37,Normal,"Dear sir ,Exhibit 1 @ exhibit 5All casesAnd co..."
2964,Re: Slot for Project L - Confirmation,Meher Komal,m.komal27@gmail.com,Stephen Natkar @ Gera Customer Relations,wojcustomersupport1@gera.in,2022-08-05 13:00:45,2022-08-05 13:01:08,Normal,"Ok, thank you.On Fri, Aug 5, 2022, 12:54 PM St..."


In [86]:
x_test = test_df['Body.TextBody'].values 
x_test_tokenized = x_tokenizer.texts_to_sequences(x_test)
x_testing = sequence.pad_sequences(x_test_tokenized, maxlen=max_text_length)

In [87]:
from tensorflow.python.keras.models import load_model

model2 = load_model( "Abhishek_Model.h5" )
y_testing = model2.predict(x_testing)
max_idx = y_testing.argmax(axis=1) #get the indexes for the max probabilities
out_labels = [list_of_classes[i] for i in max_idx]
df = pd.DataFrame(y_testing, 
                  columns = list_of_classes) 

df1 = pd.DataFrame(data=out_labels)

 
C = df.join(df1) 
     



2022-09-12 12:00:00.977570: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [88]:
dashboard = data.join(C)
dashboard = dashboard.drop(columns="index")
dashboard

Unnamed: 0,Subject,Sender.Name,Sender.Address,ToRecipients.Name,ToRecipients.Address,DateTimeSent,DateTimeReceived,Importance,Body.TextBody,anger,...,sadness,surprise,neutral,emotion,Construction Quality,Escalation/Brand,Legal,Others/Social Media,Payments/Interest waiver,0
0,Fwd: PAYMENT OF RS.450000/-,HANAMANTHRAO CHOUDRY,hrchoudry@gmail.com,Shalini Shah @ Gera Customer Relations,shalini.shah@gera.in,2022-09-10 14:08:06,2022-09-10 14:10:41,Normal,"Dear Sir/Madam, FYI, As per the Telephonic con...",0.200572,...,0.421653,0.559019,97.954773,neutral,0.046078,0.035592,0.016477,0.983923,0.962282,Others/Social Media
1,Credentials for Gera world of joy application ...,nilesh hajare,hajarenilesh@gmail.com,Sanjeev Yadav @ Gera Customer Relations,customersupportpojrm3@gera.in,2022-09-10 13:19:19,2022-09-10 13:19:37,Normal,"Dear Gera Team, I am holding B1G01 & B1G02 in...",0.239305,...,0.164696,0.642805,4.157209,joy,0.199082,0.295249,0.115872,0.959542,0.224868,Others/Social Media
2,RE: [EXT] Fw: Important Notification - Usage o...,Subramanian Balaji Lakshmi,balaji.subramanian@michelin.com,Sanjeev Yadav @ Gera Customer Relations,customersupportpojrm3@gera.in,2022-09-10 13:10:29,2022-09-10 13:11:07,Normal,"Hello Sanjeev & Ajinkya , As I discussed with...",69.368088,...,2.402454,63.111626,53.569763,anger,0.716294,0.115962,0.507782,0.270905,0.225192,Construction Quality
3,Reimbursement of Sep EMI for G-1510 and G-1511,sunil singh,sunilsingh13@gmail.com,Varsha Vats @ Gera Customer Relations,customersupportpojrm2@gera.in,2022-09-10 12:36:45,2022-09-10 12:37:22,Normal,"For my units G-1510 and G-1511, please do the ...",0.146886,...,0.177049,0.810602,88.503799,neutral,0.005894,0.011599,0.968564,0.653609,0.961196,Legal
4,Re: Intent of Termination Letter for Mr. Ramra...,Ramraj Pandey,pramraj@gmail.com,Paulson Adatkaran @ Gera Customer Relations,paulson.julies@gera.in,2022-09-10 12:34:33,2022-09-10 12:34:53,Normal,"Hi Mr. Adatkaran, I have transferred Rs 300,0...",0.169846,...,0.128957,1.485788,47.680176,joy,0.343968,0.019784,0.270116,0.867088,0.691359,Others/Social Media
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2961,C116_Original agreement copy,Subhash Patil,smpatil255@gmail.com,Pallavi Ladkat @ Gera Customer Relations,pallavi.ladkat@gera.in,2022-08-05 16:40:45,2022-08-05 16:41:03,Normal,"Dear sir, Myself Subhash Patil having a comme...",0.141272,...,0.214269,0.608693,83.128555,neutral,0.063932,0.425064,0.621551,0.538240,0.453915,Legal
2962,Change of contact details,Vartika,vartika134@gmail.com,Shalini Shah @ Gera Customer Relations,shalini.shah@gera.in,2022-08-05 15:57:26,2022-08-05 15:57:44,Normal,"Hi Shalini, As discussed, can you please chan...",80.817909,...,0.128391,39.027832,68.069847,anger,0.195913,0.860689,0.003778,0.975466,0.167527,Others/Social Media
2963,,Sayali Gujrathi,sayaligujrathi7@gmail.com,Paulson Adatkaran @ Gera Customer Relations,paulson.julies@gera.in,2022-08-05 13:56:16,2022-08-05 13:56:37,Normal,"Dear sir , Exhibit 1 @ exhibit 5 All cases ...",0.281418,...,0.294429,0.616587,97.440353,neutral,0.142617,0.098364,0.014060,0.995061,0.786745,Others/Social Media
2964,Re: Slot for Project L - Confirmation,Meher Komal,m.komal27@gmail.com,Stephen Natkar @ Gera Customer Relations,wojcustomersupport1@gera.in,2022-08-05 13:00:45,2022-08-05 13:01:08,Normal,"Ok, thank you. On Fri, Aug 5, 2022, 12:54 PM ...",0.404224,...,0.234794,0.810185,1.004376,joy,0.183204,0.202949,0.028425,0.936650,0.538806,Others/Social Media


In [101]:
dashboard["emotion"].value_counts()

neutral     1366
joy          953
anger        388
sadness      198
surprise      50
fear          11
Name: emotion, dtype: int64

In [135]:
dashboard.to_csv("Dashboard.csv")

In [90]:
#FTP Connection

In [91]:
import ftplib
#connection parameters
ftpHost='115.124.113.164'
ftpPort=21
ftpUsername='cce'
ftpPassword='Outdo2022'

In [92]:
#create an FTP client instance, use the timeout(seconds) only if connectio is slow
ftp =ftplib.FTP(timeout=1200)

In [93]:
#connecting to ftp server
ftp.connect(ftpHost, ftpPort)

'220 Welcome message'

In [94]:
#log in to ftp server
ftp.login(ftpUsername, ftpPassword)

'230 Login successful.'

In [95]:
ftp.nlst()

['Abhishek_Model.csv', 'Customer Additional Details', 'Dashboard']

In [96]:
ftp.cwd("Dashboard/")

'250 Directory successfully changed.'

In [97]:
ftp.nlst()

['Abhishek_Model.csv', 'Dashboard.csv']

In [98]:
ftp.sendcmd('PASV')

'227 Entering Passive Mode (115,124,113,164,117,142).'

In [99]:
localfile = open("Dashboard.csv",'rb')
ftp.storbinary('APPE Dashboard.csv', localfile)
localfile.close()

In [100]:
print(ftp.close())

None
