In [1]:
# Importing Libraries
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn import svm
    from sklearn.preprocessing import StandardScaler
    import pickle
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()

In [2]:
comment_type = {'1':'Gratitude','2':'About Recipe','3':'About Video','4':'Praising Chef','5':'Hybrid',
                '6':'Undefined','7':'Suggestion/Query'}

In [3]:
data_df = pd.read_csv(pwd+"//Datasets//Kabita//Input//Test_Data.csv")
data_df

Unnamed: 0,comments
0,Thankyou for the video bilkul pasand karta hun...
1,Mai try kiya aaj delicious recipe
2,video ki clarity bahot achi h
3,bahot beautiful dikh rahe ho
4,video achi h aur aap bhi
5,yeh saal mein who is watching this video comme...
6,kaun si company chilli powder use kar rahe ho


In [4]:
def preprocess_text(sen):
    # Removing html tags
    sentence = remove_tags(sen)

    # Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]', ' ', sentence)

    # Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

    # Removing multiple spaces
    sentence = re.sub(r'\s+', ' ', sentence)

    return sentence

In [5]:
TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)

In [6]:
comments = []
sentences = list(data_df['comments'])
for sen in sentences:
    comments.append(preprocess_text(sen))

In [7]:
comments

['Thankyou for the video bilkul pasand karta hun aapki videos',
 'Mai try kiya aaj delicious recipe',
 'video ki clarity bahot achi h',
 'bahot beautiful dikh rahe ho',
 'video achi aur aap bhi',
 'yeh saal mein who is watching this video comment like karo',
 'kaun si company chilli powder use kar rahe ho']

In [8]:
# Converting comments to vectors through Verloop BERT Hinglish

from sentence_transformers import SentenceTransformer

model = SentenceTransformer('verloop/Hinglish-Bert')

embeddings = model.encode(comments)
print(embeddings)

Some weights of the model checkpoint at C:\Users\murth/.cache\torch\sentence_transformers\verloop_Hinglish-Bert were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[[-0.12100051 -0.30702236  0.17778549 ... -0.03112733 -0.43440607
  -0.15938774]
 [-0.00626889 -0.21276009  0.3372688  ...  0.42898044 -0.36675984
  -0.26373821]
 [-0.21520971 -0.30725047  0.37495995 ... -0.06397274  0.07015433
  -0.249461  ]
 ...
 [-0.0346365  -0.41711336  0.30021864 ... -0.04054578  0.17144695
  -0.22335693]
 [-0.43569008 -0.302707    0.17271754 ...  0.220048   -0.24916384
  -0.14671196]
 [-0.50334793 -0.11135657  0.17930578 ... -0.00080812 -0.12539728
  -0.09804695]]


In [9]:
# Standardizing the data
standard_model = StandardScaler()
scaled_data = standard_model.fit_transform(embeddings)
scaled_data

array([[ 0.692131  , -0.35720742, -0.71820533, ..., -0.6008675 ,
        -1.3717283 ,  0.5539496 ],
       [ 1.2260015 ,  0.719132  ,  1.1104636 , ...,  2.0965326 ,
        -1.0393343 , -1.3354238 ],
       [ 0.25375557, -0.35981208,  1.5426382 , ..., -0.7934251 ,
         1.1075345 , -1.0769199 ],
       ...,
       [ 1.0940009 , -1.6142881 ,  0.6856387 , ..., -0.6560836 ,
         1.6052569 , -0.60427886],
       [-0.7721863 , -0.30793202, -0.77631545, ...,  0.8716578 ,
        -0.4615019 ,  0.7834577 ],
       [-1.0870126 ,  1.8770142 , -0.7007734 , ..., -0.42311996,
         0.14665093,  1.6645881 ]], dtype=float32)

### Kabita's Model Prediction

In [10]:
kabita_final_model = pickle.load(open(pwd+"//Final_Models//Kabita_Standard_SVM_Model.sav",'rb'))

In [11]:
pred = kabita_final_model.predict(scaled_data)
print(pred)

[1 2 4 4 6 6 7]


In [12]:
for x in range(len(pred)):
    print(data_df['comments'][x]+" : "+comment_type[str(pred[x])])
    print(50*"*")

Thankyou for the video bilkul pasand karta hun aapki videos : Gratitude
**************************************************
Mai try kiya aaj delicious recipe : About Recipe
**************************************************
video ki clarity bahot achi h : Praising Chef
**************************************************
bahot beautiful dikh rahe ho : Praising Chef
**************************************************
video achi h aur aap bhi : Undefined
**************************************************
yeh saal mein who is watching this video comment like karo : Undefined
**************************************************
kaun si company chilli powder use kar rahe ho : Suggestion/Query
**************************************************


### Nisha's Model Prediction

In [13]:
nisha_final_model = pickle.load(open(pwd+"//Final_Models//Nisha_Standard_SVM_Model.sav",'rb'))

In [14]:
pred = nisha_final_model.predict(scaled_data)
print(pred)

[1 2 4 4 4 6 7]


In [15]:
for x in range(len(pred)):
    print(data_df['comments'][x]+" : "+comment_type[str(pred[x])])
    print(50*"*")

Thankyou for the video bilkul pasand karta hun aapki videos : Gratitude
**************************************************
Mai try kiya aaj delicious recipe : About Recipe
**************************************************
video ki clarity bahot achi h : Praising Chef
**************************************************
bahot beautiful dikh rahe ho : Praising Chef
**************************************************
video achi h aur aap bhi : Praising Chef
**************************************************
yeh saal mein who is watching this video comment like karo : Undefined
**************************************************
kaun si company chilli powder use kar rahe ho : Suggestion/Query
**************************************************
