In [1]:
import pandas as pd
import holoviews.plotting.bokeh
import holoviews as hv
from holoviews import opts, dim
from datetime import date
from bokeh.io import output_file, save

hv.extension('bokeh')
hv.output(size=200)

In [2]:
df = pd.read_excel("ing_review_w_user_ratings.xlsx")

In [3]:
start_date = date(2022,1,1)
if start_date:
    df = df[df["date"] >= pd.to_datetime(start_date)]
    df.reset_index(inplace=True,drop=True)

In [4]:
THRES_COMPLAINT = 0.8
THRES_PRAISE = 0.8

THRES_QUESTION = 0.5
THRES_STATEMENT = 0.5

THRES_LABELS = 0.7

In [5]:
def apply_question_statement(row):
    question_candidate = 1 if row['question'] > THRES_QUESTION else 0
    statement_candidate = 1 if row['statement'] > THRES_STATEMENT else 0
    
    if question_candidate == 1 and statement_candidate == 1:
        if row['question'] > row['statement']:
            statement_candidate = 0
        else:
            question_candidate = 0
    
    return pd.Series([question_candidate, statement_candidate])

df[['question_after', 'statement_after']] = df.apply(apply_question_statement, axis=1)

In [6]:
def apply_complaint_praise(row):
    complaint_candidate = 1 if row['complaint'] > THRES_COMPLAINT else 0
    praise_candidate = 1 if row['praise'] > THRES_PRAISE else 0
    
    if complaint_candidate == 1 and praise_candidate == 1:
        if row['complaint'] > row['praise']:
            complaint_candidate = 0
        else:
            praise_candidate = 0
    
    return pd.Series([complaint_candidate, praise_candidate])

df[['complaint_after', 'praise_after']] = df.apply(apply_complaint_praise, axis=1)

In [7]:
columns_to_process = ['user interface', 'design', 'finance', 'credit', 'customer relationship', 'performance']

def apply_threshold(row, col, threshold):
    return 1 if row[col] > threshold else 0

for col in columns_to_process:
    new_col_name = f'{col}_after' 
    df[new_col_name] = df.apply(apply_threshold, col=col, threshold=THRES_LABELS, axis=1)


In [8]:
data = df[['question_after', 'statement_after', 'complaint_after', 'praise_after', 'user interface_after', 'design_after', 'finance_after', 'credit_after', 'customer relationship_after', 'performance_after']]
data.columns = ['question', 'statement', 'complaint', 'praise', 'user interface', 'design', 'finance', 'credit', 'customer relationship', 'performance']

In [9]:
node_labels = data.columns.values.tolist()

In [10]:
df_source_target = pd.DataFrame(columns=["source","target","value"])

for i in range(len(node_labels)):
    for j in range(len(node_labels)):
        source = node_labels[i]
        target = node_labels[j]
        if source == target:
            continue
        temp = data[(data[source] == 1) & (data[target] == 1)]
        value = temp.shape[0]
        df_source_target.loc[len(df_source_target),:] = [i,j,value]

df_source_target['source_target'] = df_source_target.apply(lambda row: tuple(sorted([row['source'], row['target']])), axis=1)

df_unique = df_source_target.drop_duplicates(subset=['source_target']).drop(columns=['source_target'])

df_unique["value"] = df_unique["value"].astype(int)

In [11]:
nodes = hv.Dataset(pd.DataFrame(node_labels, columns=["nodes"]), 'index')

chord = hv.Chord((df_unique, nodes)).select(value=(9, None))

chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', 
                      edge_color=dim('source').str(), labels='nodes', 
                      node_color=dim('index').str()))

In [12]:
output_file("chord_diagram.html")

save(hv.render(chord))

'/Users/okanyenigun/Desktop/codes/projects/genai_work/notebooks/chord_diagram.html'