In [24]:
import pandas as pd
tagged_df = pd.read_csv("tagged_and_feedback.csv")
non_tagged_df=pd.read_csv("data_translated.csv")
tagged_df=tagged_df[["Verbatim Translated","Tagged Touchpoint"]]
tagged_df['Tagged Touchpoint'] = tagged_df['Tagged Touchpoint'].str.split(',')
tagged_df.head()

Unnamed: 0,Verbatim Translated,Tagged Touchpoint
0,the hard -to -answer careline line needs to be...,[call center]
1,there is no atm or rhb in jasin melaka,[atm]
2,up grade your apps,[mobile banking]
3,hope can improve mobile app factions or link b...,"[branch, internet banking, mobile banking]"
4,leave a notification short message for each wi...,[atm]


### Data Training

In [5]:
X = tagged_df.iloc[:, 0].values
y = tagged_df.iloc[:, 1].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [6]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.base import TransformerMixin
from sklearn.ensemble import GradientBoostingClassifier

mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(y_train)

class DenseTransformer(TransformerMixin):

    def fit(self, X, y=None, **fit_params):
        return self

    def transform(self, X, y=None, **fit_params):
        return X.todense()

classifier = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('to_dense', DenseTransformer()), 
    ('clf', OneVsRestClassifier(GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,max_depth=1, random_state=0)))])

classifier.fit(X_train, Y)
predicted = classifier.predict(X_test)
all_labels = mlb.inverse_transform(predicted)


In [27]:
non_tagged_df.dropna(inplace=True)
predicted2 = classifier.predict(non_tagged_df["Verbatim Translated"])
all_label2 = mlb.inverse_transform(predicted2)
index=0
correct_score=0
for item, labels in zip(non_tagged_df["Verbatim Translated"], all_label2):
    
    print(item)
    print(f"Predicted : {np.asarray(labels)}")

    index += 1

the hard -to -answer careline line needs to be fixed
Predicted : ['call center']
there is no atm or rhb in jasin melaka
Predicted : ['atm']
up grade your apps
Predicted : ['mobile banking']
maintain the service as it is now and gift something good for using rhb
Predicted : []
hope can improve mobile app factions or link branch account immediate 
Predicted : ['branch' 'internet banking' 'mobile banking']
leave a notification short message for each withdrawal and entry transaction for users to know each transaction performed
Predicted : ['atm']
online connection server and request for thumb print login for rhb apps
Predicted : ['internet banking' 'mobile banking']
suddenly want wait long time if can improve service 
Predicted : ['branch']
for mobileinternet banking if key in the wrong password for few times suggest to have alternative method to unlock the account and not only can be unlocked via call to customer service unlock without password change it is quite inconvenience is the acco

In [7]:
def common_elements(list1, list2):
    # wrong values
    return len(list(set(list1).intersection(list2)))/len(list2)


### Computing Scores

In [26]:
index=0
correct_score=0
for item, labels in zip(X_test, all_labels):
    
    print(item)
    print(f"Predicted : {np.asarray(labels)}")
    print(f'Actual : {y_test[index]}')
    correct_score+=common_elements(labels,y_test[index])

    index += 1

print(correct_score/index*100)

reduce waiting timeu go and survey how pbb response timeit is so fastbut rhb is extremely slow
Predicted : ['branch']
Actual : ['branch']
lower interest rate
Predicted : ['relationship manager']
Actual : ['relationship manager']
replying to the customer inquiry asap i have requested to change my due date as loan disbursement and payment date was  weeks different only
Predicted : ['branch' 'relationship manager']
Actual : ['branch', 'relationship manager']
customer friendly
Predicted : ['branch' 'call center' 'relationship manager']
Actual : ['branch', 'call center', 'relationship manager']
please update your online banking and mobile banking systems bcoz theres seems some glitches especially the mobile banking app
Predicted : ['internet banking' 'mobile banking']
Actual : ['internet banking', 'mobile banking']
answer your calls from the call center when you ask for your customers number saying you will call back please call within the  or  hours  have yet to receive the call check your

In [18]:
X_test2=["i send a complaint through sms, but nobody picked up on it"]

predicted2 = classifier.predict(X_test2)
all_labels2 = mlb.inverse_transform(predicted2)


for item, labels in zip(X_test2, all_labels2):
    
    print('{0} => {1}'.format(item, ', '.join(labels)))



i send a complaint through sms, but nobody picked up on it => 


In [75]:
import pickle
pickle.dump(classifier, open("DavesModel", 'wb'))

loaded_model = pickle.load(open("DavesModel", 'rb'))

result = loaded_model




In [77]:
X_test2=["something atm"]

predicted2 = result.predict(X_test2)
all_labels2 = mlb.inverse_transform(predicted2)

for item, labels in zip(X_test2, all_labels2):
    print('{0} => {1}'.format(item, ', '.join(labels)))

something atm => atm
