### import necessary libararys

In [1]:
import pandas as pd
import numpy as np

### Data Loading

In [2]:
df=pd.read_csv("BankFAQs.csv")

### Printing First Five Rows of Data

In [3]:
df.head()

Unnamed: 0,Question,Answer,Class
0,Do I need to enter ‘#’ after keying in my Card...,Please listen to the recorded message and foll...,security
1,What details are required when I want to perfo...,"To perform a secure IVR transaction, you will ...",security
2,How should I get the IVR Password if I hold a...,An IVR password can be requested only from the...,security
3,How do I register my Mobile number for IVR Pas...,Please call our Customer Service Centre and en...,security
4,How can I obtain an IVR Password,By Sending SMS request: Send an SMS 'PWD<space...,security


### Data Information

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1764 entries, 0 to 1763
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Question  1764 non-null   object
 1   Answer    1764 non-null   object
 2   Class     1764 non-null   object
dtypes: object(3)
memory usage: 41.5+ KB


### Basic Stats About Data

In [5]:
df.describe().T

Unnamed: 0,count,unique,top,freq
Question,1764,1334,How can I change the mode of repayment/ accoun...,18
Answer,1764,1440,Post Dated Cheques(PDCs)/Security Cheques subm...,17
Class,1764,7,insurance,469


### Data count of Each class

In [6]:
df['Class'].value_counts()

insurance        469
cards            403
loans            375
accounts         306
investments      140
security          57
fundstransfer     14
Name: Class, dtype: int64

In [7]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

### Splitting Data into Train and Test

In [10]:
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

### Applying Word embedding Techniques on Features

In [11]:
# Transform the text data into feature vectors
X_train = vectorizer.fit_transform(train_data['Question'])
X_test = vectorizer.transform(test_data['Question'])

### Target

In [12]:
# Define the labels
y_train = train_data['Class']
y_test = test_data['Class']

### Applying SVC Model

In [13]:
from sklearn.svm import SVC

In [14]:
# Train the SVM model
svm_model = SVC(C=10, kernel='rbf', gamma=0.1, decision_function_shape='ovr')
svm_model.fit(X_train, y_train)

SVC(C=10, gamma=0.1)

In [19]:
# Make predictions on the test set
svm_predictions = svm_model.predict(X_test)

In [20]:
svm_accuracy = accuracy_score(y_test, svm_predictions)

In [21]:
print("SVM accuracy:", svm_accuracy)

SVM accuracy: 0.9206798866855525


In [22]:
print("SVM classification report:")
print(classification_report(y_test, svm_predictions))


SVM classification report:
               precision    recall  f1-score   support

     accounts       0.87      0.95      0.91        57
        cards       0.92      0.97      0.94        88
fundstransfer       1.00      0.17      0.29         6
    insurance       0.93      0.98      0.95        86
  investments       0.88      0.78      0.82        27
        loans       0.95      0.93      0.94        81
     security       1.00      0.62      0.77         8

     accuracy                           0.92       353
    macro avg       0.94      0.77      0.80       353
 weighted avg       0.92      0.92      0.91       353



In [23]:
print("SVM confusion matrix:")
print(confusion_matrix(y_test, svm_predictions))

SVM confusion matrix:
[[54  0  0  3  0  0  0]
 [ 0 85  0  1  0  2  0]
 [ 1  1  1  1  1  1  0]
 [ 0  0  0 84  1  1  0]
 [ 5  1  0  0 21  0  0]
 [ 2  2  0  1  1 75  0]
 [ 0  3  0  0  0  0  5]]


### Applying NavieBayes Algorithm

In [24]:
from sklearn.naive_bayes import MultinomialNB

In [25]:
# Train the Naive Bayes model
nb_model = MultinomialNB(alpha=1)
nb_model.fit(X_train, y_train)

MultinomialNB(alpha=1)

In [26]:
nb_predictions = nb_model.predict(X_test)

In [27]:
nb_accuracy = accuracy_score(y_test, nb_predictions)

In [28]:
print("Naive Bayes accuracy:", nb_accuracy)

Naive Bayes accuracy: 0.8753541076487252


In [29]:
# Print the classification report and confusion matrix for Naive Bayes model
print("Naive Bayes classification report:")
print(classification_report(y_test, nb_predictions))

Naive Bayes classification report:
               precision    recall  f1-score   support

     accounts       0.91      0.89      0.90        57
        cards       0.89      0.98      0.93        88
fundstransfer       0.00      0.00      0.00         6
    insurance       0.87      0.87      0.87        86
  investments       0.86      0.70      0.78        27
        loans       0.85      0.93      0.89        81
     security       0.75      0.38      0.50         8

     accuracy                           0.88       353
    macro avg       0.73      0.68      0.70       353
 weighted avg       0.86      0.88      0.86       353



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
print("Naive Bayes confusion matrix:")
print(confusion_matrix(y_test, nb_predictions))

Naive Bayes confusion matrix:
[[51  1  0  2  0  3  0]
 [ 0 86  0  1  0  0  1]
 [ 0  0  0  2  2  2  0]
 [ 2  3  0 75  1  5  0]
 [ 2  1  0  2 19  3  0]
 [ 1  1  0  4  0 75  0]
 [ 0  5  0  0  0  0  3]]


# predictions

In [32]:
from sklearn.metrics.pairwise import cosine_similarity
input_question = "Where do I get this card"
input_vector = vectorizer.transform([input_question])
predicted_class = svm_model.predict(input_vector)[0]
class_data = df[df['Class'] == predicted_class]
class_vectors = vectorizer.transform(class_data['Question'])
similarities = cosine_similarity(input_vector, class_vectors)
most_similar_index = similarities.argmax()
predicted_answer = class_data.iloc[most_similar_index]['Answer']

# Print the predicted class and answer
print("Predicted class:", predicted_class)
print("Predicted answer:", predicted_answer)

Predicted class: cards
Predicted answer: ForexPlus card is available over the counter at HDFC Bank branches. You simply need to walk into our branch with required documentation and walk out with ForexPlus card. The card will get activated within 4 hours from realization of funds. Alternatively, the customer can apply for card through NetBanking login also.In case the customer applies the card through NetBanking the card will be delivered to the customer within 2 working days. This facility is available at select locations only.
