In [11]:
import pandas as pd
import plotly.express as px

In [4]:
check_balance_df = pd.read_csv('../csv/check_balance.csv')
paynow_transfer_df = pd.read_csv('../csv/paynow_transfer.csv')
scan_to_pay_df = pd.read_csv('../csv/scan_to_pay.csv')

In [5]:
combined_df = pd.concat([check_balance_df, paynow_transfer_df, scan_to_pay_df], ignore_index=True)

In [6]:
combined_df

Unnamed: 0,bank_function,date,account_number,see_past_transactions,msgs_to_customer_support,transaction_time,recipient_number,amount,qr_string
0,Check Balance,1/6/23,12345678.0,Past 7 days,"[""I have a question about my recent transactio...",,,,
1,Check Balance,1/6/23,23456789.0,Past 1 month,"[""I'm locked out of my online banking account....",,,,
2,Check Balance,2/6/23,34567890.0,Past 3 months,"[""I suspect fraud on my account. What should I...",,,,
3,Check Balance,2/6/23,45678901.0,Past 7 days,"[""I received a suspicious email. Is it from th...",,,,
4,Check Balance,2/6/23,56789012.0,Past 1 month,"[""I'm experiencing issues with online banking....",,,,
...,...,...,...,...,...,...,...,...,...
143,Scan To Pay,1/8/23,45678901.0,,"[""I have a question about a recent transaction...",10:45:55,98888890.0,1000.75,QkVHSU46VkFMVUVTVC1QVUJMSUMgUE5HOjpBbW91bnQ6MT...
144,Scan To Pay,1/8/23,56789012.0,,"[""Can you provide the wire transfer fee detail...",11:30:55,96666667.0,800.40,QkVHSU46VkFMVUVTVC1QVUJMSUMgUE5HOjpBbW91bnQ6MT...
145,Scan To Pay,1/8/23,67890123.0,,"[""How often should I change my online banking ...",15:20:10,98888891.0,450.55,QkVHSU46VkFMVUVTVC1QVUJMSUMgUE5HOjpBbW91bnQ6MT...
146,Scan To Pay,1/8/23,12345678.0,,"[""What's the maximum loan amount for small bus...",8:05:45,95555558.0,2200.90,QkVHSU46VkFMVUVTVC1QVUJMSUMgUE5HOjpBbW91bnQ6MT...


In [9]:
bank_function_count = combined_df.groupby(["bank_function"]).size().reset_index(name="bank_function_count")

In [10]:
bank_function_count

Unnamed: 0,bank_function,bank_function_count
0,Check Balance,48
1,PayNow Transfer,50
2,Scan To Pay,49


In [14]:
fig = px.pie(
    bank_function_count,
    values="bank_function_count",
    names="bank_function",
    title="Bank Function Distribution",
    hole=0.6,
    color_discrete_sequence=["#B8D5E5", "#92BFD8", "#63A3C7"]
)
fig.show()

In [31]:
from transformers import pipeline
import pandas as pd
import re
import time

start = time.time()

sentiment_pipeline = pipeline("sentiment-analysis") #create sentiment analysis model
df = pd.read_csv('../csv/overview.csv') #read overview df
unrecognised_msgs = df["unrecognised_msgs"].tolist() #get unrecognised_msgs column

msgs = []
pattern = r'["\'\[\]]'

for lst in unrecognised_msgs:
    lst = lst.split(", ") # transform string representation of array to array ie "[['msg1', 'msg2'], ['msg3', 'msg4']]" to [['msg1', 'msg2'], ['msg3', 'msg4']]
    for msg in lst:
            msg = re.sub(pattern, '', msg) # remove ' " [ ] from msg
            msg = msg.strip() # remove spaces from the start and end of msg
            if msg != "": # msgs_to_customer_support column is []
                msgs.append(msg)

sentiments = sentiment_pipeline(msgs) # apply sentiment analysis

# turn msgs and sentiment into a csv
msgs_sentiments = {"unrecognised_msgs": msgs, "sentiment_label_score": sentiments}
msgs_sentiments_df = pd.DataFrame(msgs_sentiments)
msgs_sentiments_df.to_csv("../csv/sentiments.csv", index=False)

end = time.time()
total_seconds = end - start
minutes = total_seconds // 60
seconds = total_seconds % 60
print(f"{minutes:02}:{seconds:02}")


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.

All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


0.0:15.521202087402344


In [34]:
import json
sentiment_df = pd.read_csv("../csv/sentiments.csv")
sentiment_df["sentiment_label_score"] = sentiment_df["sentiment_label_score"].apply(lambda x: x.replace("'", "\""))
sentiment_df["sentiment_label"] = sentiment_df["sentiment_label_score"].apply(lambda x: json.loads(x)["label"])
sentiment_df.head(5)

Unnamed: 0,unrecognised_msgs,sentiment_label_score,sentiment_label
0,I have a question about my recent transactions.,"{""label"": ""NEGATIVE"", ""score"": 0.8892908096313...",NEGATIVE
1,How can I set up e-statements?,"{""label"": ""NEGATIVE"", ""score"": 0.999462902545929}",NEGATIVE
2,Can you help me with my credit card rewards?,"{""label"": ""NEGATIVE"", ""score"": 0.8585994243621...",NEGATIVE
3,Im locked out of my online banking account.,"{""label"": ""NEGATIVE"", ""score"": 0.9991946816444...",NEGATIVE
4,Whats the process for applying for a credit card?,"{""label"": ""NEGATIVE"", ""score"": 0.9964313507080...",NEGATIVE


In [36]:
sentiment_distribution = sentiment_df.groupby(["sentiment_label"]).size().reset_index(name="sentiment_distribution")
sentiment_distribution

Unnamed: 0,sentiment_label,sentiment_distribution
0,NEGATIVE,127
1,POSITIVE,5


In [38]:
fig = px.pie(
    sentiment_distribution,
    values="sentiment_distribution",
    names="sentiment_label",
    title="Sentiment Distribution",
    hole=0.6,
    color_discrete_sequence=["#B8D5E5", "#92BFD8"]
)
fig.show()

In [None]:
check_balance_df = pd.read_csv('../csv/check_balance.csv')
paynow_transfer_df = pd.read_csv('../csv/paynow_transfer.csv')
scan_to_pay_df = pd.read_csv('../csv/scan_to_pay.csv')

In [None]:
def get_fig(df, metric_choice):
            df['date'] = pd.to_datetime(df['date'], format="%d/%m/%y")
            sorted_df = df.sort_values(by="date")
            
            if metric_choice == "Users":
                users_df = sorted_df.groupby(["date"])['account_number'].nunique().reset_index(name="user count")
                fig = px.line(users_df, x="date", y="user count", title="Number of Users Over Time", hover_name=["date", "user count"])
            
            elif metric_choice == "Sessions":
                sessions_df = sorted_df.groupby(["date"]).size().reset_index(name="session count")
                fig = px.line(sessions_df, x="date", y="session count", title="Number of Sessions Over Time", hover_name=["date", "session count"])
            
            fig.update_xaxes(rangeslider_visible=True)
            return fig

In [62]:
users_df_arr = []
sessions_df_arr = []

for bf in ["check_balance", "paynow_transfer", "scan_to_pay"]:
    df = pd.read_csv('../csv/' + bf + '.csv')
    df['date'] = pd.to_datetime(df['date'], format="%d/%m/%y")
    sorted_df = df.sort_values(by="date")

    users_df = sorted_df.groupby(["date"])['account_number'].nunique().reset_index(name= bf + "_user_count")
    sessions_df = sorted_df.groupby(["date"]).size().reset_index(name= bf + "_session_count")

    users_df_arr.append(users_df)
    sessions_df_arr.append(sessions_df)

# users_df_final = pd.concat(users_df_arr, axis=1, ignore_index=True)
# sessions_df_final = pd.concat(sessions_df_arr, axis=1, ignore_index=True)

users_df_final = pd.DataFrame({"date": []})
for df in users_df_arr:
    users_df_final = pd.merge(users_df_final, df, on='date', how='outer')

sessions_df_final = pd.DataFrame({"date": []})
for df in sessions_df_arr:
    sessions_df_final = pd.merge(sessions_df_final, df, on='date', how='outer')

In [63]:
users_df_final.head(5)

Unnamed: 0,date,check_balance_user_count,paynow_transfer_user_count,scan_to_pay_user_count
0,2023-06-01,2,2,2
1,2023-06-02,3,3,3
2,2023-06-03,4,4,4
3,2023-07-01,6,6,6
4,2023-07-02,7,8,6


In [64]:
sessions_df_final.head(5)

Unnamed: 0,date,check_balance_session_count,paynow_transfer_session_count,scan_to_pay_session_count
0,2023-06-01,2,2,2
1,2023-06-02,3,3,3
2,2023-06-03,4,4,4
3,2023-07-01,6,6,6
4,2023-07-02,8,8,8


In [83]:
check_balance_df = pd.read_csv('../csv/check_balance.csv')
paynow_transfer_df = pd.read_csv('../csv/paynow_transfer.csv')
scan_to_pay_df = pd.read_csv('../csv/scan_to_pay.csv')

metric_choice="Users"

def process_bank_function(df, metric_choice):
    df['date'] = pd.to_datetime(df['date'], format="%d/%m/%y")
    df.sort_values(by="date", inplace=True)

    if metric_choice == "Users":
        return df.groupby(["date"])['account_number'].nunique().reset_index()
    
    elif metric_choice == "Sessions":
        return df.groupby(["date"]).size().reset_index()
    
final_df = pd.DataFrame({"date": []})
for df in [check_balance_df, paynow_transfer_df, scan_to_pay_df]:
    dff = process_bank_function(df, metric_choice)
    final_df = pd.merge(final_df, dff, on='date', how='outer')

overview_user_metrics_fig = px.line(final_df, x='date', y=final_df.columns)
overview_user_metrics_fig.update_layout(yaxis_title='user count')
overview_user_metrics_fig.update_traces(line={'width': 2})