# Import

In [1]:
import os

os.chdir(os.path.dirname(os.getcwd()))

In [2]:
import re
import json
import pickle
from tqdm.auto import tqdm
from collections import defaultdict

import faiss
import boto3
import numpy as np
from sklearn.metrics import confusion_matrix

from run import ModelRunner
from jovis_model.config import Config
from jovis_model.utils.helper import build_faiss_index

# Common

### Runner

In [3]:
m_type_id_dict = {
    "claude-3-haiku": "anthropic.claude-3-haiku-20240307-v1:0",
    "claude-3-sonnet": "anthropic.claude-3-sonnet-20240229-v1:0",
    "claude-3-opus": "anthropic.claude-3-opus-20240229-v1:0"
}

In [4]:
params = {
    "pkg": "llm",
    "task": "bedrock",
    "use_hf_model": False,
    "params": {
        "m_type_id": m_type_id_dict["claude-3-haiku"],
        "max_new_tokens": 400,
    }
}
config = Config(**params)
runner = ModelRunner(
    config=config,
    mode="inference"
)

03:48:13:PM:INFO: data module loaded
03:48:13:PM:INFO: Found credentials in shared credentials file: ~/.aws/credentials
03:48:14:PM:INFO: model module loaded


In [45]:
params = {
    "pkg": "llm",
    "task": "sentence_embedding",
    "use_hf_model": True,
    "params": {
        "hf_name": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
    }
}
config = Config(**params)
se_runner = ModelRunner(
    config=config,
    mode="inference"
)

01:20:26:PM:INFO: data module loaded
01:20:34:PM:INFO: model module loaded


### Helper

In [5]:
def parse_model_output(output):
    start, end = "<output>", "</output>"
    try:
        res = json.loads(output[output.find(start) + len(start): output.find(end)])
    except:
        res = None
    return res

# Intention Classifier

### Testset generation

In [4]:
gen_prompt = """
You are an expert to generate utterances which can be used as an input to a e-commerce chatbot system.
Generally, these utterances are related to fashion items worn by celebrities or actors in tv shows.
Your task is to generate utterances which have specific intention.

Here is the list of intention:
<intention>
1. item_search: Asking to search/find fashion products based on specific criteria, such as attributes(category, colors, material, print, neckline, etc.), occasion(wedding, picnic, etc.), or any other conditions (worn by some celebrities on tv shows, etc.)
2. style_discuss: Asking for recommendations about fashion and styling products.
3. program_actor_question: Asking questions about tv shows or actors.
</intention>

Here is the output format you should consider.
<output_format>
Provide your output in the following JSON format only. Enclose your JSON output with `<output>` and `</output>`:
<output>
{
    "item_search": <value>,
    "style_discuss": <value>,
    "program_actor_question": <value>
}
</output>
There's some rule:
1. one utterence per intention.
2. All the value MUST be in Korean.
</output_format>
"""

In [39]:
def generate(prompt, n=500):
    dialogs = [
        {"role": "user", "content": prompt}
    ]
    start = "<output>"
    end = "</output>"
    labels = ["item_search", "style_discuss", "program_actor_question"]
    success = defaultdict(list)
    error = defaultdict(list)
    pbar = tqdm(total=n)
    cnt = 0
    while cnt < n:
        parsed = True
        try:
            res = runner.run(sample_inputs=dialogs)
            tmp = json.loads(res[res.find(start) + len(start): res.find(end)])
        except:
            error["parse_error"].append(res)
            parsed = False
        if parsed:
            check = True
            for l in labels:
                if l not in tmp:
                    check = False
            if check:
                for k, v in tmp.items():
                    success[k].append(v)
                cnt += 1
                pbar.update(1)
            else:
                error["key_error"].append(parsed)
    pbar.close()
    return success, error

In [40]:
success, error = generate(gen_prompt)

100%|██████████| 500/500 [15:26<00:00,  1.85s/it]


In [53]:
with open("jovis_model/_db/skb/testsets/intention_classifier/testset.json", "w") as f:
    json.dump(success, f)

### Evaluation

In [4]:
with open("jovis_model/_db/skb/testsets/intention_classifier/testset.json", "r") as f:
    data = json.load(f)

In [5]:
test_prompt = """
You are an agent for an e-commerce chatbot system.
Your task is to classify the intention of the current_user_query.

Here is the chat history:
<chat_history>
{}
</chat_history>

The current user query is:
<current_user_query>
{}
</current_user_query>

Today's date is:
<today_date>
{{TODAY_DATE}}
</today_date>

Here is the list of intention:
<intention_classification>
1. item_search: asking to search for a product based on specific criteria, such as attributes(category, colors, material, print, neckline, etc.), occasion(wedding, picnic, etc.), or any other conditions (worn by some celebrities on tv shows, etc.)
2. style_discuss: Asking for recommendations about fashion and styling products.
3. program_actor_question: Asking questions about tv shows or actors.
4. none: if the user's query does not belong to any of the above categories
</intention_classification>

<output_format>
Provide your output in the following JSON format only. Enclose your JSON output with `<output>` and `</output>`:
<output>
{{
    "intention": <value>
}}
</output>
</output_format>
"""

In [6]:
def test_query(test_data, prompt):
    success = []
    error = defaultdict(list)
    start = "<output>"
    end = "</output>"
    for k, v in test_data.items():
        pbar = tqdm(total=len(v))
        for v_ in v:
            dialogs = [
                {"role": "user", "content": prompt.format("", v_)}
            ]
            parsed = True
            try:
                res = runner.run(sample_inputs=dialogs)
                tmp = json.loads(res[res.find(start) + len(start): res.find(end)])
            except:
                error["parse_error"].append([k, v_, res])
                parsed = False
            if parsed:
                success.append([v_, k, tmp["intention"]])
            pbar.update(1)
        pbar.close()
    return success, error

In [7]:
success, error = test_query(data, test_prompt)

100%|██████████| 500/500 [04:53<00:00,  1.70it/s]
100%|██████████| 500/500 [04:54<00:00,  1.70it/s]
100%|██████████| 500/500 [05:06<00:00,  1.63it/s]


In [14]:
success = np.array(success)
y_true = success[:, 1]
y_pred = success[:, 2]

In [18]:
confm = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=["item_search", "style_discuss", "program_actor_question", "none"])

In [26]:
failed_case = []
for q, y, y_hat in success:
    if y != y_hat:
        failed_case.append([q, y, y_hat])

# Keyword Extractor

### Evaluation

#### test_prompt

In [6]:
test_prompt = """
You are an agent for an e-commerce chatbot system.
Your task is to extract relevant keywords from the <current_user_query> to help the specialized agents search for appropriate products.

Here is the chat history:
<chat_history>
{{CHAT_HISTORY}}
</chat_history>

The current user query is:
<current_user_query>
{}
</current_user_query>

Today's date is:
<today_date>
{{}}
</today_date>

<keyword_extraction>
Next, extract the following relevant keywords from the user's query:
1. target-item: the item category the user is looking for. Here's the categories you can choose : ['accessories', 'bags', 'belts', 'glasses', 'gloves', 'hair_accessories', 'hats', 'jewelries', 'key_ring', 'outwears', 'pants', 'scarf/muffler', 'shoes', 'skirts', 'socks', 'swimwears', 'tie', 'tops', 'watches', 'wholebodies']
2. target-attribute: attributes of the target item. Here's the attributes you can choose: ['color', 'material', 'print', 'neckline', 'length', 'season', 'place', 'occasion', 'style']
3. program-name: name of the TV show
4. actor-name: name of the actor or actress (e.g 오연서, 김종민)
5. looking-for-popular: This value is True or False (boolean). For example, when a user asks about popular items, such as what are trend t-shirts these days?, this value is True.
6. brand: brand of the product. (e.g., Gucci, Nike, etc.)
7. price: price or price range the user asked for. the format should be <min_price>-<max_price> (e.g., below 1000 krw: -1000 / above 10000 krw: 10000- / around 100000: 50000-150000)
8. time: time range of tv show. given date of <today_date>, extract asked time period in the format of YYYYMMDD-YYYYMMDD

For target-item, you have to choose category in specified list with keyword.
Each keyword should have an empty string value if there is no corresponding information in the user's query. Do not make things up.
</keyword_extraction>

<output_format>
Provide your output in the following JSON format only. Enclose your JSON output with `<output>` and `</output>`:
<output>
{{
    "role": "keyword_extractor",
    "target-item": {{
        <item1>: <category>,
        <item2>: <category>
    }},
    "target-attribute": {{
        <item1>: {{
            "looking-for-popular": <value>,
            "program-name": <value>,
            "actor-name": <value>,
            "brand": <value>,
            "price": <value>,
            "time": <value>,
            "color": <value>,
            "material": <value>,
            "print": <value>,
            "neckline": <value>,
            "length": <value>,
            "season": <value>,
            "place": <value>,
            "occasion": <value>,
            "style": <value>
        }},
        <item2> : {{
            "looking-for-popular": <value>,
            "program-name": <value>,
            "actor-name": <value>,
            "brand": <value>,
            "price": <value>,
            "time": <value>,
            "color": <value>,
            "material": <value>,
            "print": <value>,
            "neckline": <value>,
            "length": <value>,
            "season": <value>,
            "place": <value>,
            "occasion": <value>,
            "style": <value>
        }}
    }}
}}
</output>
There's some rules:
1. All <item> and <value> MUST be in korean.
</output_format>
"""

In [7]:
def test_query(test_querys, prompt):
    success = []
    error = []
    pbar = tqdm(total=len(test_querys))
    for q in test_querys:
        dialogs = [
            {"role": "user", "content": prompt.format(q)}
        ]
        res = runner.run(sample_inputs=dialogs)
        output = parse_model_output(res)
        if output:
            success.append([q, output])
        else:
            error.append([q, res])
        pbar.update(1)
    pbar.close()
    return success, error

#### metric

In [8]:
def get_price_range(keyword):
    split_points = re.findall("-", keyword)
    res = None
    if len(split_points) == 1:
        try:
            lower, upper = [0 if bound == "" else int(bound) for bound in keyword.split("-")]
            if upper == 0:
                upper = 1000000000
            res = [lower, upper]
        except:
            return res
    return res

def get_accuracy(labels, preds):
    res = defaultdict(list)
    failed = defaultdict(list)
    for idx, (label, pred) in enumerate(zip(labels, preds)):
        for k in list(label.keys()):
            if k == "price":
                hit = False
                if pred.get(k, ""):
                    price_range = get_price_range(pred[k])
                    if price_range and (price_range[0] <= label[k] <= price_range[1]):
                        hit = True
                if hit:
                    res[k].append(1)
                else:
                    res[k].append(0)
                    failed[k].append([idx, label[k], pred.get(k, "")])
            else:
                if label[k] == pred.get(k, ""):
                    res[k].append(1)
                else:
                    res[k].append(0)
                    failed[k].append([idx, label[k], pred.get(k, "")])
    res = {k: sum(v) / len(v) for k, v in list(res.items())}
    return res, failed

#### parsing

In [9]:
def parse_result_output(output):
    res = []
    for q, o in tqdm(output):
        tmp = {}
        tmp["item"], tmp["category"] = list(o["target-item"].items())[0]
        attributes = list(o["target-attribute"].values())[0]
        tmp["program_name"] = attributes.get("program-name", "")
        tmp["actor_name"] = attributes.get("actor-name", "")
        tmp["color"] = attributes.get("color", "")
        tmp["material"] = attributes.get("material", "")
        tmp["length"] = attributes.get("length", "")
        tmp["price"] = attributes.get("price", "")
        res.append(tmp)
    return res

#### Testset1

In [10]:
with open("jovis_model/_db/skb/testsets/keyword_extractor/querys_1000_simple_color.pkl", "rb") as f:
    data = pickle.load(f)

In [11]:
test_1_querys, test_1_labels = [], []
for d in data[:500]:
    test_1_querys.append(d["query"])
    tmp = d["metadata"]
    tmp_label = {}
    tmp_label["program_name"] = tmp["program_name"]
    tmp_label["actor_name"] = tmp["actor_name"]
    tmp_label["item"] = tmp["product_category"]
    tmp_label["color"] = tmp["product_colors"]
    test_1_labels.append(tmp_label)

In [None]:
test_1_success, test_1_error = test_query(test_1_querys, test_prompt)

  0%|          | 0/500 [00:00<?, ?it/s]

In [65]:
print(test_1_error[0][1])

<output>
{
    "role": "keyword_extractor",
    "target-item": {
        "청바지": "pants"
    },
    "target-attribute": {
        "청바지": {
            "looking-for-popular": False,
            "program-name": "런닝맨 673회",
            "actor-name": "송지효",
            "brand": "",
            "price": "",
            "time": "",
            "color": "스카이블루",
            "material": "",
            "print": "",
            "neckline": "",
            "length": "",
            "season": "",
            "place": "",
            "occasion": "",
            "style": ""
        }
    }
}
</output>


In [13]:
test_1_preds = parse_result_output(test_1_success)

  0%|          | 0/331 [00:00<?, ?it/s]

In [46]:
tmp_output = test_1_error[0][1]

In [57]:
start, end = "<output>", "</output>"
res = json.loads(tmp_output[tmp_output.find(start) + len(start): tmp_output.find(end)])

JSONDecodeError: Expecting value: line 9 column 36 (char 165)

In [64]:
print(tmp_output[tmp_output.find(start) + len(start): tmp_output.find(end)])


{
    "role": "keyword_extractor",
    "target-item": {
        "청바지": "pants"
    },
    "target-attribute": {
        "청바지": {
            "looking-for-popular": False,
            "program-name": "런닝맨 673회",
            "actor-name": "송지효",
            "brand": "",
            "price": "",
            "time": "",
            "color": "스카이블루",
            "material": "",
            "print": "",
            "neckline": "",
            "length": "",
            "season": "",
            "place": "",
            "occasion": "",
            "style": ""
        }
    }
}



In [54]:
print(tmp_output[tmp_output.find(start) + len(start): tmp_output.find(end)].strip())

{
    "role": "keyword_extractor",
    "target-item": {
        "청바지": "pants"
    },
    "target-attribute": {
        "청바지": {
            "looking-for-popular": False,
            "program-name": "런닝맨 673회",
            "actor-name": "송지효",
            "brand": "",
            "price": "",
            "time": "",
            "color": "스카이블루",
            "material": "",
            "print": "",
            "neckline": "",
            "length": "",
            "season": "",
            "place": "",
            "occasion": "",
            "style": ""
        }
    }
}


In [51]:
json.loads(tmp_output[tmp_output.find(start) + len(start): tmp_output.find(end)])

JSONDecodeError: Expecting value: line 9 column 36 (char 165)

In [45]:
print(test_1_error[0][1])

<output>
{
    "role": "keyword_extractor",
    "target-item": {
        "청바지": "pants"
    },
    "target-attribute": {
        "청바지": {
            "looking-for-popular": False,
            "program-name": "런닝맨 673회",
            "actor-name": "송지효",
            "brand": "",
            "price": "",
            "time": "",
            "color": "스카이블루",
            "material": "",
            "print": "",
            "neckline": "",
            "length": "",
            "season": "",
            "place": "",
            "occasion": "",
            "style": ""
        }
    }
}
</output>


In [44]:
parse_model_output(print(test_1_error[0][1]))

<output>
{
    "role": "keyword_extractor",
    "target-item": {
        "청바지": "pants"
    },
    "target-attribute": {
        "청바지": {
            "looking-for-popular": False,
            "program-name": "런닝맨 673회",
            "actor-name": "송지효",
            "brand": "",
            "price": "",
            "time": "",
            "color": "스카이블루",
            "material": "",
            "print": "",
            "neckline": "",
            "length": "",
            "season": "",
            "place": "",
            "occasion": "",
            "style": ""
        }
    }
}
</output>


In [16]:
test_1_acc, test_1_failed = get_accuracy(test_1_labels, test_1_preds)

In [17]:
test_1_acc

{'program_name': 0.027522935779816515,
 'actor_name': 0.012232415902140673,
 'item': 0.03363914373088685,
 'color': 0.09174311926605505}

In [31]:
test_1_querys[1]

'런닝맨 673회에서 송지효가 입은 스카이블루 청바지 어디서 사나요?'

In [29]:
test_1_labels[1]

{'program_name': '런닝맨', 'actor_name': '송지효', 'item': '청바지', 'color': '스카이블루'}

In [30]:
test_1_preds[2]

{'item': '셔츠',
 'category': 'tops',
 'program_name': '요즘 남자 라이프 - 신랑수업',
 'actor_name': '심형탁',
 'color': '그레이',
 'material': '',
 'length': '',
 'price': ''}

In [23]:
test_1_querys[4]

'요즘 남자 라이프 - 신랑수업 105회에서 김동완이 입었던 스카이블루 색상의 스웨터, 정보 좀 알려줄 수 있어?'

In [21]:
test_1_failed["color"]

[[4, '스카이블루', '블랙'],
 [5, '블랙', '화이트'],
 [6, '옐로우', '그린색'],
 [7, '화이트', '그레이'],
 [8, '그린', '그린색'],
 [9, '레드', '블랙'],
 [10, '그레이', '블루'],
 [11, '베이지', '그레이'],
 [12, '그린', '화이트'],
 [13, '블랙', '화이트'],
 [14, '블루', '브라운'],
 [15, '그레이', '화이트'],
 [16, '화이트', '네이비'],
 [17, '화이트', '블랙'],
 [19, '옐로우', '오렌지색'],
 [20, '브라운', '블랙'],
 [21, '블랙', '브라운'],
 [22, '그레이', '블랙'],
 [23, '화이트', '그린'],
 [24, '네이비', '그린색'],
 [25, '블랙', '브라운'],
 [26, '레드', '화이트'],
 [27, '오렌지', '스카이블루'],
 [28, '그린', '민트'],
 [29, '화이트', '카키색'],
 [30, '블랙', '옐로우'],
 [31, '브라운', '카키색'],
 [32, '블랙', '퍼플'],
 [33, '그린', '화이트'],
 [34, '그린', '네이비'],
 [35, '스카이블루', '블랙'],
 [36, '브라운', '화이트'],
 [37, '블루', '블랙색'],
 [38, '화이트', '베이지색'],
 [39, '스카이블루', '블랙'],
 [40, '민트', '그레이'],
 [41, '카키', '골드'],
 [42, '옐로우', '네이비'],
 [43, '레드', '브라운색'],
 [44, '카키', '화이트'],
 [45, '화이트', '핑크색'],
 [46, '퍼플', '네이비'],
 [47, '화이트', '레드'],
 [48, '그레이', '네이비'],
 [49, '네이비', '네이비색'],
 [50, '블랙', '화이트'],
 [53, '블랙', '퍼플'],
 [56, '베이지', '화이트'],
 [58, '블랙', '블루색'],
 [

#### Testset2

In [21]:
with open("jovis_model/_db/skb/testsets/keyword_extractor/querys_500_price_strict.pkl", "rb") as f:
    data = pickle.load(f)

In [22]:
test_2_querys, test_2_labels = [], []
for d in data[:500]:
    test_2_querys.append(d["query"])
    tmp = d["metadata"]
    tmp_label = {}
    if not isinstance(tmp["아이템"], float):
        tmp_label["item"] = tmp["아이템"]
    if not isinstance(tmp["재질"], float):
        tmp_label["material"] = tmp["재질"]
    if not isinstance(tmp["색상"], float):
        tmp_label["color"] = tmp["색상"]
    if not isinstance(tmp["가격"], float):
        tmp_label["price"] = int("".join(re.findall(r"\d+", tmp["가격"])))
    test_2_labels.append(tmp_label)

In [23]:
test_2_success, test_2_error = test_query(test_2_querys, test_prompt)

  0%|          | 0/500 [00:00<?, ?it/s]

In [24]:
test_2_preds = parse_result_output(test_2_success)

  0%|          | 0/500 [00:00<?, ?it/s]

In [39]:
test_2_acc, test_2_failed = get_accuracy(test_2_labels, test_2_preds)

In [40]:
test_2_acc

{'item': 0.636,
 'material': 0.7619047619047619,
 'color': 0.8038793103448276,
 'price': 0.84}

# Meta embeddings

In [47]:
from jovis_model.utils.mapping import ATTRIBUTE

In [51]:
attr_keywords = []
for attr_dict in list(ATTRIBUTE.values()):
    attr_keywords.append(attr_dict["ko"])
    attr_keywords.append(attr_dict["en"])

In [53]:
embeddings = []
for kw in tqdm(attr_keywords):
    embeddings.append(se_runner.run([kw]).detach().cpu().numpy()[0].tolist())

  0%|          | 0/1798 [00:00<?, ?it/s]

In [62]:
build_faiss_index(
    embeddings=embeddings,
    save_path="outputs/skb",
    save_name="attr_embeddings_mpnet",
    pids=attr_keywords
)

In [63]:
index_file_name = "attr_embeddings_mpnet"
attr_index = faiss.read_index(f"outputs/skb/{index_file_name}.index")

In [65]:
with open(f"outputs/skb/{index_file_name}_map.json", "r") as f:
    attr_map = json.load(f)

In [89]:
attr_map_rev = {v: k for k, v in attr_map.items()}

In [143]:
search_vector = se_runner.run(["니트베스트"]).detach().cpu().numpy()[0].tolist()

In [144]:
score, indices = attr_index.search(np.array(search_vector).reshape(1, -1), 20)
res = [attr_map[str(idx)] for idx in indices[0]]

# Download

In [57]:
bucket = boto3.resource("s3").Bucket("bedrock-hackaton")

obj_keys = []
for f in bucket.objects.filter(Prefix="01HZ64DQT6H8FQKMM78EYZ52SD/").all():
    obj_keys.append(f.key)

In [66]:
data_dict = {}
for k in tqdm(obj_keys):
    obj = bucket.Object(k)
    pid = os.path.basename(k).split(".")[0]
    data_dict[pid] = obj.get()["Body"].read().decode("utf-8")

100%|██████████| 20101/20101 [1:27:01<00:00,  3.85it/s]  


In [67]:
with open("jovis_model/_db/skb/testsets/intention_classifier/ppl_documents.json", "w") as f:
    json.dump(data_dict, f)