# Setup

In [None]:
#!pip install -r requirements.txt
import pickle as pkl
from context_descriptor import ContextDescriptor
from prompt_generator import PromptGenerator
from model_request_handler import ModelRequestHandler
from open_ai_request_handler import OpenAiRequestHandler
import numpy as np
from examples_manager import Examples_manager
from sklearn.metrics import classification_report
import time
from IPython.display import clear_output
import os

with open("open_ai_api_key.txt") as f:
    OPEN_AI_API_KEY = f.readline()
    
clear_output()
print("Setup complete")

# Select dataset

In [None]:
DATASET = "EXTRASENSORY"
if DATASET not in ["DOMINO", "EXTRASENSORY"]:
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

# Load dataset

In [None]:
extrasensory_classes = ['BICYCLING', 'LAYING_DOWN', 'MOVING_BY_CAR', 'ON_TRANSPORT', 'SITTING', 'STANDING', 'WALKING']
domino_classes = ['BRUSHING_TEETH', 'CYCLING', 'ELEVATOR_DOWN', 'ELEVATOR_UP', 'LYING', 'MOVING_BY_CAR', 'RUNNING', 'SITTING', 'SITTING_ON_TRANSPORT', 'STAIRS_DOWN', 'STAIRS_UP', 'STANDING', 'STANDING_ON_TRANSPORT', 'WALKING']

In [None]:
if DATASET == "DOMINO":
    CLASSES = domino_classes
elif DATASET == "EXTRASENSORY":
    CLASSES = extrasensory_classes
else:
    CLASSES = None
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

In [None]:
if DATASET == "DOMINO":
    with open("datasets/domino/domino_unique_contexts.pkl", "rb") as file:
        unique_contexts = pkl.load(file)
    with open("datasets/domino/domino_unique_contexts_ontology_consistencies.pkl", "rb") as file:
        unique_contexts_ontology_consistencies = pkl.load(file)
    with open("datasets/domino/domino_unique_contexts_labels.pkl", "rb") as file:
        unique_contexts_labels = pkl.load(file)
    with open("datasets/domino/domino_unique_contexts_occurrences.pkl", "rb") as file:
        unique_occurrences = pkl.load(file)
    with open("datasets/domino/domino-segmented-w4-hz50.txt", "rb") as file:
        domino = pkl.load(file)
        context = domino['enhanced_context']

elif DATASET == "EXTRASENSORY":
    with open("datasets/extrasensory/extrasensory_unique_contexts.pkl", "rb") as file:
        unique_contexts = pkl.load(file)
    with open("datasets/extrasensory/extrasensory_unique_contexts_labels.pkl", "rb") as file:
        unique_contexts_labels = pkl.load(file)
    with open("datasets/extrasensory/extrasensory_unique_contexts_ontology_consistencies.pkl", "rb") as file:
        unique_contexts_ontology_consistencies = pkl.load(file)
    with open("datasets/extrasensory/extrasensory_unique_contexts_occurrences.pkl", "rb") as file:
        unique_occurrences = pkl.load(file)
    with open('datasets/extrasensory/extrasensory.pkl', "rb") as f:    
        extrasensory = pkl.load(f)
        context = extrasensory['context']
        
else:
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

# Define system message

In [None]:
system_message = f'''You are a domain expert in charge of checking which human activities are suitable with respect to the current surrounding context of a user. You will receive as input a description of the surrounding context of the user. You have to determine which are the activities that are suitable in the given context. This is the list of the possible user activities that you have to consider: {', '.join(CLASSES)}. Accomplish the task by following the steps below:Step 1: Analyze the context.Step 2: Determine which activities are likely in the given context, by analyzing each activity. Apply the open-world assumption: anything that is not explicit in the surrounding context of the user may be possible. Do not exclude activities for which you cannot determine their likelihood or the ones for which there are no specific information about objects and vehicles that the user is using.Step 3: Provide the result in the form of a list.'''

In [None]:
#Think about the "answer and step by step analyze each activity.
#system_message += "In the following i'll give you some examples of contexts that you can receive and the answer " \
#                  "that i would expect: "

# Initialize helper classes

In [None]:
contest_descriptor = ContextDescriptor()
prompt_generator = PromptGenerator(system_message)
request_handler = OpenAiRequestHandler(api_key=OPEN_AI_API_KEY)

# Initialize example manager

In [None]:
if DATASET == "DOMINO":
    example_manager = Examples_manager("domino_examples.csv")
elif DATASET == "EXTRASENSORY":
    example_manager = Examples_manager("extrasensory_examples.csv")
else:
    example_manager = None
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

# Choose context desciption function

In [None]:
if DATASET == "DOMINO":
    create_context_description = lambda context_vector: contest_descriptor.create_domino_context_description(context_vector)
elif DATASET == "EXTRASENSORY":
    create_context_description = lambda context_vector: contest_descriptor.create_extrasensory_context_description(context_vector)
else:
    create_context_description = None
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

# Make the api calls

In [None]:
k = #INSERT K

In [None]:
def get_classes_from_binary_vector(bin_vec):
    classes_to_return = []
    for v, c in zip(bin_vec, CLASSES):
        if v==1:
            classes_to_return.append(c)
    return classes_to_return

In [None]:
start_time = time.time()
all_responses = []
all_usages = []
i = 0
for c, l, o in list(zip(unique_contexts, unique_contexts_labels, unique_contexts_ontology_consistencies)):
    i += 1
    print(i, get_classes_from_binary_vector(o))
    #print(o, get_classes_from_binary_vector(o))
    description = create_context_description(c)
    #print(description)
    most_similar_examples =  example_manager.get_most_similar_examples(description, 'cosine',k)
    #print(description)
    prompt = prompt_generator.generate_prompt(description, most_similar_examples)
    #print(prompt)
    responses, usages = request_handler.handle_request(prompt, repetitions=1, return_all=True)
    #print(responses)
    if [a.upper() for a in responses[0]] != get_classes_from_binary_vector(o) and [a.lower() for a in responses[0]] != get_classes_from_binary_vector(o):
        print()
        print(description, "\n", get_classes_from_binary_vector(o), "\n", responses[0])
    all_responses.append(responses)
    all_usages.append(usages)
    print("---------------------------------------------------------------------------------")
print("--- %s seconds ---" % (time.time() - start_time))

# Dump data

## Dump answers

In [None]:
if not os.path.exists("answers"):
   os.makedirs("answers")
if not os.path.exists("answers/extrasensory"):
   os.makedirs("answers/extrasensory")
if not os.path.exists("answers/domino"):
   os.makedirs("answers/domino")

In [None]:
if DATASET == "DOMINO":
    answers_path = f"answers/domino/answers_k{k}.pkl"
elif DATASET == "EXTRASENSORY":
    answers_path = f"answers/extrasensory/answers_k{k}.pkl"
else:
    answers_path = None
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

In [None]:
with open(answers_path, "wb") as file:
    pkl.dump(all_responses, file)

# Dump usages

In [None]:
if not os.path.exists("usages"):
   os.makedirs("usages")
if not os.path.exists("usages/extrasensory"):
   os.makedirs("usages/extrasensory")
if not os.path.exists("usages/domino"):
   os.makedirs("usages/domino")

In [None]:
if DATASET == "DOMINO":
    usages_path = f"usages/domino/usages_k{k}.pkl"
elif DATASET == "EXTRASENSORY":
    usages_path = f"usages/extrasensory/usages_k{k}.pkl"
else:
    usages_path = None
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

In [None]:
with open(usages_path, "wb") as file:
    pkl.dump(all_usages, file)

# Load answers and ontology

In [None]:
if DATASET == "DOMINO":
    answers_path = f"answers/domino/answers_k{k}.pkl"
elif DATASET == "EXTRASENSORY":
    answers_path = f"answers/extrasensory/answers_k{k}.pkl"
else:
    answers_path = None
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

In [None]:
if DATASET == "DOMINO":
    ontology_path = f'datasets/domino/domino_unique_contexts_ontology_consistencies.pkl'
elif DATASET == "EXTRASENSORY":
    ontology_path = f"datasets/extrasensory/extrasensory_unique_contexts_ontology_consistencies.pkl"
else:
    answers_path = None
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

In [None]:
with open(answers_path, "rb") as file:
    all_responses = pkl.load(file)
with open(ontology_path, "rb") as file:
    ontology_consistencies = pkl.load(file)

# Compute results

In [None]:
def count_occurrences(activity, list_of_lists):
    count = 0
    for sublist in list_of_lists:
        count += sublist.count(activity)
    return count

In [None]:
min_count = 1

In [None]:
preds = []
for response in all_responses:
    # print(response)
    aggregated_response = []
    for act in CLASSES:
        if count_occurrences(act.lower(), response) >= min_count:
            aggregated_response.append(1)
        else:
            aggregated_response.append(0)
    preds.append(aggregated_response)

In [None]:
print(classification_report(ontology_consistencies, preds, target_names=CLASSES))

# Get results for DL

In [None]:
if not os.path.exists("consistencies"):
   os.makedirs("consistencies")
if not os.path.exists("consistencies/extrasensory"):
   os.makedirs("consistencies/extrasensory")
if not os.path.exists("consistencies/domino"):
   os.makedirs("consistencies/domino")

In [None]:
#TODO
#CREATE FINAL CONSISTENCY LIST
list_consistencies = [0] * len(context)
y = 0
for i, un in enumerate(unique_contexts):
    for occ in unique_occurrences[i]:
        list_consistencies[occ] = all_responses[y] #all_responses = lista di consistenza univoca, sarebbe preds in un caso normale
    y+=1

In [None]:
len(list_consistencies)

In [None]:
if DATASET == "DOMINO":
    consistencies_path = f'consistencies/domino/consistencies_llm_k{k}.pkl'
elif DATASET == "EXTRASENSORY":
    consistencies_path = f"consistencies/extrasensory/consistencies_llm_k{k}.pkl"
else:
    consistencies_path = None
    raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")

In [None]:
with open(consistencies_path, "wb") as file:
    pkl.dump(np.array(list_consistencies), file)

# AUTOMATED EXPERIMENTS

In [None]:
tresholds = [0.5]

In [None]:
#CREATE FOLDERS
if not os.path.exists("answers"):
    os.makedirs("answers")
if not os.path.exists("answers/extrasensory"):
    os.makedirs("answers/extrasensory")
if not os.path.exists("answers/domino"):
    os.makedirs("answers/domino")

if not os.path.exists("usages"):
    os.makedirs("usages")
if not os.path.exists("usages/extrasensory"):
    os.makedirs("usages/extrasensory")
if not os.path.exists("usages/domino"):
    os.makedirs("usages/domino")

if not os.path.exists("n_examples"):
    os.makedirs("n_examples")
if not os.path.exists("n_examples/extrasensory"):
    os.makedirs("n_examples/extrasensory")
if not os.path.exists("n_examples/domino"):
    os.makedirs("n_examples/domino")

for k in tresholds:
    start_time = time.time()
    all_responses = []
    all_usages = []
    n_examples = []
    i = 0
    for c, l, o in list(zip(unique_contexts, unique_contexts_labels, unique_contexts_ontology_consistencies)):
        i += 1
        print(i, get_classes_from_binary_vector(o))
        #print(o, get_classes_from_binary_vector(o))
        description = create_context_description(c)
        #print(description)
        most_similar_examples =  example_manager.get_most_similar_examples(description, 'cosine',k)
        n_examples.append(len(most_similar_examples))
        #print(description)
        prompt = prompt_generator.generate_prompt(description, most_similar_examples)
        #print(prompt)
        responses, usages = request_handler.handle_request(prompt, repetitions=1, return_all=True)
        #print(responses)
        if [a.upper() for a in responses[0]] != get_classes_from_binary_vector(o) and [a.lower() for a in responses[0]] != get_classes_from_binary_vector(o):
            print()
            print(description, "\n", get_classes_from_binary_vector(o), "\n", responses[0])
        all_responses.append(responses)
        all_usages.append(usages)
        print("---------------------------------------------------------------------------------")
    print("--- %s seconds ---" % (time.time() - start_time))

    #DUMP ANSWERS
    if DATASET == "DOMINO":
        answers_path = f"answers/domino/answers_k{k}.pkl"
    elif DATASET == "EXTRASENSORY":
        answers_path = f"answers/extrasensory/answers_k{k}.pkl"
    else:
        answers_path = None
        raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")
    
    with open(usages_path, "wb") as file:
        pkl.dump(all_usages, file)

    #DUMP USAGES
    if DATASET == "DOMINO":
        usages_path = f"usages/domino/usages_k{k}.pkl"
    elif DATASET == "EXTRASENSORY":
        usages_path = f"usages/extrasensory/usages_k{k}.pkl"
    else:
        usages_path = None
        raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")
    
    with open(usages_path, "wb") as file:
        pkl.dump(all_usages, file)

    #DUMP N_EXAMPLES
    if DATASET == "DOMINO":
        examples_path = f"n_examples/domino/n_examples_k{k}.pkl"
    elif DATASET == "EXTRASENSORY":
        examples_path = f"n_examples/extrasensory/n_examples_k{k}.pkl"
    else:
        examples_path = None
        raise ValueError("DATASET name must be 'DOMINO' or 'EXTRASENSORY'")
    
    with open(examples_path, "wb") as file:
        pkl.dump(n_examples, file)