# API documentation 
https://cloud.ibm.com/apidocs/assistant-v2?code=python#listassistants


In [1]:
import pprint
from dotenv import load_dotenv
import os
import json
from ibm_watson import AssistantV2
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
import pandas as pd
import pickle

load_dotenv()

True

# API assistant setup

In [None]:
API_KEY = os.getenv(key='API_KEY')
URL = 'https://api.us-east.assistant.watson.cloud.ibm.com/'
ASSISTANT_NAME = 'Ask_Proc_Main'

In [None]:

authenticator = IAMAuthenticator(API_KEY)
assistant = AssistantV2(
    version='2021-06-14',
    authenticator = authenticator
)

assistant.set_service_url(URL)

# Get List of assistants available

This will give you all the assistants that are available in that instance in IBM cloud. 

In [None]:
assistants_list = assistant.list_assistants()

In [None]:
for bot in assistants_list.get_result().get('assistants'):
    if bot.get('name') == ASSISTANT_NAME:
        ASSISTANT_ID = bot.get('assistant_id')
        pprint.pprint(bot)

print(f'\n\nYour assistant id is: {ASSISTANT_ID}')

In [None]:
# here you can define the assistant environment to get the logs from 
ASSISTANT_ENV_ID = '572332fa-74b2-4e42-9c4d-93c891106824'

# Get log data 

In [None]:
# to get all the logs 
cursor = None
page = 1

all_logs = []

while True:
    print(f'working on page {page}')
    page += 1
    response=assistant.list_logs(
        assistant_id=ASSISTANT_ENV_ID,
        cursor=cursor,
    ).get_result()

    all_logs.extend(response['logs'])

    cursor=response.get('pagination').get('next_cursor')
    if not cursor:
        break

In [None]:
len(all_logs)

#to sabe the logs in picke object 
with open('all_logs.pkl', 'wb') as file: 
    pickle.dump(all_logs, file=file)

# From here to read all the logs from pkl file 

In [2]:
#read pickle back 
with open('all_logs.pkl', 'rb') as file:
    # Load the pickle object using pickle.load
    all_logs = pickle.load(file)

In [3]:
len(all_logs)

9174

In [None]:
# Get all keys for requests 
request_first_level_keys = set()

for log in all_logs:
    data = log.get('request').get('input')
    keys = data.keys()
    for k in keys:
        request_first_level_keys.add(k)

In [None]:
request_first_level_keys

In [None]:
# count all keys from request 

keys_counter = {
    'entities': 0,
    'filter': 0,
    'intents': 0,
    'internal': 0,
    'message_type': 0,
    'options': 0,
    'query_type': 0,
    'return_to_dialog': 0,
    'source': 0,
    'suggestion_id': 0,
    'text': 0
}

for log in all_logs:
    data = log.get('request').get('input')
    keys = data.keys()
    for k in keys_counter.keys():
        if k in keys:
            keys_counter[k] += 1



In [None]:
keys_counter

In [None]:
# anlyze the data in each key for request 
count = 0

for log in all_logs:
    data = log.get('request').get('input')
    keys = data.keys()
    if 'suggestion_id' in keys:
        pprint.pprint(data)
        print('--------END--------')
        count += 1
    if count == 7:
        break



In [None]:
all_logs[0].get('request').get('input')

In [12]:
def json_expander(base_dict):
    my_data = {}
    change_counts = 0
    for key, val in base_dict.items():
        if type(val) == dict:
            change_counts += 1
            for key2, val2 in val.items():
                my_data[key+'_'+key2] = val2
        if type(val) == str:
            my_data[key] = val
        if type(val) == list:
            if (len(val)>0) and (type(val[0]) == dict):
                change_counts += 1
                for key3, val3 in val[0].items():
                    my_data[key+'_'+key3] = val3
            if (len(val)>0) and (type(val[0]) == str):
                my_data[key] = val[0]

    return my_data, change_counts

In [None]:
all_logs[0].get('response').get('output')

In [None]:
df = pd.DataFrame()
data = {
    'log_id': all_logs[0].get('log_id'),
    'input': all_logs[0].get('request').get('input')
    }

data


In [None]:
counter_while = 0
change_counter = 1 

while change_counter != 0:
    if counter_while == 0:
        new_data, change_counter = json_expander(base_dict=data)
        counter_while += 1
    else: 
        new_data, change_counter = json_expander(base_dict=new_data)
    


In [None]:
new_data

# How to save logs in able or csv 

In [13]:
df_logs = pd.DataFrame()

for log in all_logs: 
    temp = {
        'log_id' : log.get('log_id'),
        'request_timestamp' : log.get('request_timestamp'),
        'response_timestamp' : log.get('response_timestamp'),
        'assistant_id' : log.get('assistant_id'),
        'session_id' : log.get('session_id'),
        'customer_id' : log.get('customer_id'),
        'skill_id' : log.get('skill_id'),
        'request_input' : log.get('request').get('input'),
        'response_output' : log.get('response').get('output')
    }
    counter_while = 0
    change_counter = 1 

    while change_counter != 0:
        if counter_while == 0:
            new_data, change_counter = json_expander(base_dict=temp)
            counter_while += 1
        else: 
            new_data, change_counter = json_expander(base_dict=new_data)

    temp_df = pd.DataFrame([new_data])
    #print(temp_df['input_source_id'][0])

    df_logs = pd.concat([df_logs, temp_df], ignore_index=True)

    


In [None]:
all_logs[0].keys()

In [None]:
for k in all_logs[0].keys():
    print(k)

In [None]:
all_logs[500].get('request').get('input')

In [None]:
all_logs[0].get('request').get('input').keys()

In [14]:
df_logs['request_timestamp'] = pd.to_datetime(df_logs['request_timestamp'], utc=False)#.dt.floor('ms')
df_logs['request_timestamp'] = df_logs['request_timestamp'].dt.tz_localize(None)
df_logs['response_timestamp'] = pd.to_datetime(df_logs['response_timestamp'], utc=False)#.dt.floor('ms')
df_logs['response_timestamp'] = df_logs['response_timestamp'].dt.tz_localize(None)
df_logs.sort_values(inplace=True, by=['session_id', 'request_timestamp'], ascending=[True, True])
df_logs.reset_index(inplace=True, drop=True)
df_logs.head()

Unnamed: 0,log_id,request_timestamp,response_timestamp,assistant_id,session_id,customer_id,skill_id,request_input_message_type,request_input_source_id,request_input_source_type,...,response_output_debug_turn_events_action_variables_step_917_result_1_body_result,response_output_debug_turn_events_action_variables_step_917_result_1_body_total,response_output_debug_log_messages_values,response_output_log_messages_values,response_output_debug_turn_events_action_variables_step_412_result_1_body_result,response_output_debug_turn_events_action_variables_step_412_result_1_body_total,response_output_debug_turn_events_action_variables_step_952_result_1_body,request_input_internal_callout_request_body_invoice_number,response_output_debug_turn_events_action_variables_step_903_result_1_body_message,response_output_debug_turn_events_action_variables_step_954_result_1_body_message
0,0b94e98b-8408-42e0-b60d-91a50e70ba00,2024-01-27 23:44:44.232,2024-01-27 23:44:44.599,572332fa-74b2-4e42-9c4d-93c891106824,0003df44-f463-4ca8-a6b3-432491ee1044,luis.toribio@ibm.com,5cbe6f41-8364-4f69-9e0a-1e7891cdde76,text,luis.toribio@ibm.com,user,...,,,,,,,,,,
1,8288abce-2c3d-4644-834c-6b4a18371c45,2024-01-27 23:44:48.870,2024-01-27 23:44:49.081,572332fa-74b2-4e42-9c4d-93c891106824,0003df44-f463-4ca8-a6b3-432491ee1044,luis.toribio@ibm.com,5cbe6f41-8364-4f69-9e0a-1e7891cdde76,text,luis.toribio@ibm.com,user,...,,,,,,,,,,
2,60da6908-8b43-4322-bea8-71dd94aeb02d,2024-01-27 23:44:52.276,2024-01-27 23:44:52.570,572332fa-74b2-4e42-9c4d-93c891106824,0003df44-f463-4ca8-a6b3-432491ee1044,luis.toribio@ibm.com,5cbe6f41-8364-4f69-9e0a-1e7891cdde76,text,luis.toribio@ibm.com,user,...,,,,,,,,,,
3,f2964c47-074a-45ad-8c19-eb760be9e2fa,2024-01-27 23:44:56.715,2024-01-27 23:44:56.962,572332fa-74b2-4e42-9c4d-93c891106824,0003df44-f463-4ca8-a6b3-432491ee1044,luis.toribio@ibm.com,5cbe6f41-8364-4f69-9e0a-1e7891cdde76,text,luis.toribio@ibm.com,user,...,,,,,,,,,,
4,a1a4b9e9-8fb8-45d3-bcce-b302dc2f5763,2024-01-27 23:45:02.785,2024-01-27 23:45:03.027,572332fa-74b2-4e42-9c4d-93c891106824,0003df44-f463-4ca8-a6b3-432491ee1044,luis.toribio@ibm.com,5cbe6f41-8364-4f69-9e0a-1e7891cdde76,text,luis.toribio@ibm.com,user,...,,,,,,,,,,


In [None]:
df_logs.columns

In [None]:
df_logs.to_csv('df_logs.csv', index=False, sep='|')
#df_logs.to_parquet('df_logs.gzip', index=False, compression='gzip')

In [None]:
!pip install openpyxl

In [15]:
df_logs.to_excel('df_logs.xlsx', index=False)

In [None]:
all_logs[9000].keys()

In [None]:
all_logs[9001].get('customer_id')

In [None]:
all_logs[9000].get('request').get('input')

In [None]:
all_logs[9000].get('response').get('output')

In [None]:
logs_draft = assistant.list_logs(assistant_id=ASSISTANT_ENV_ID)

In [None]:
# saving the logs data in a variable
logs_results_data = logs_draft.get_result()

In [None]:
# what keys do we have in the dictionay ? 
logs_results_data.keys()

In [None]:
logs_results_data['pagination']

In [None]:
len(logs_results_data['logs'])

In [None]:
#log results (only one element of the log list)
logs_results_data['logs']