In [1]:
import numpy as np
import pandas as pd
import random
import json
from opensearchpy import OpenSearch
import os
OPENSEARCH_HOST = os.getenv("OPENSEARCH_HOST", "localhost")
OPENSEARCH_PORT = os.getenv("OPENSEARCH_PORT", 9200)

In [78]:
def interleave(listA, listB, k):
    ids = []
    teams = []
    rank = 1
    idx_a = 0
    idx_b = 0
    len_a = len(listA)
    len_b = len(listB)

    while rank <= k:
        a_val = listA[idx_a]['_id'] if idx_a < len_a else None
        b_val = listB[idx_b]['_id'] if idx_b < len_b else None
        if not (a_val and b_val):
            # lists are done
            return zip(ids, teams)
        if not a_val:
            # take the rest of listB
            for hit in listB[idx_b:k]['_id']:
                if hit not in ids:
                    ids.append(hit)
                    teams.append('TeamB')
            return zip(ids, teams)
        if not b_val:
            # take the rest of listA
            for hit in listA['_id']:
                if hit not in ids:
                    ids.append(hit)
                    teams.append('TeamB')
            return zip(ids, teams)
        a_first = idx_a < idx_b or idx_a == idx_b and random.randint(0,1)
        if a_first:
            if a_val not in ids:
                ids.append(a_val)
                teams.append('TeamA')
                rank += 1
            idx_a += 1
        else:
            if b_val not in ids:
                ids.append(b_val)
                teams.append('TeamB')
                rank += 1
            idx_b += 1       
    return list(zip(ids, teams))


def get_list(listA, listB, k):
    A = listA['hits']['hits']
    B = listB['hits']['hits']
    interleaving = interleave(A, B, k)
    return interleaving

def get_search_config(client, name):
    search_configs_index = '.plugins-search-relevance-search-config'
    conf = client.search( body = {
      "query": {
        "match": {"name": name}
      },
      "size": 1
    }, index=search_configs_index)
    return conf['hits']['hits'][0]['_source'] if len(conf['hits']['hits']) else {}

def populate_query(query, config, size=10, source=["title", "description", "asin"]):
    query = query.replace('"', '\\"')
    body = config['queryBody'].replace("%SearchText%", query)
    return {
        "query": json.loads(body),
        "size": size,
        "_source": source
    }

def run_AB(client, query, configA, configB, size=10):
    confA = get_search_config(client, configA)
    confB = get_search_config(client, configB)
    qA = populate_query(query, confA, size=size)
    qB = populate_query(query, confB, size=size)
    #TODO: extract the endpoint value from the search config, rather than using the client here
    resA = client.search(body=qA)
    resB = client.search(body=qB)
    result = get_list(resA, resB, 10)
    return result

def get_events(client, id, query, event_type=None):
    if event_type:
        evq = {
          "query": {
            "bool": {
             "must": [   
                 {"match": {"event_attributes.object.object_id": id}},
                 {"match": {"user_query": query}},
                 {"match": {"action_name": event_type}}
             ]
            }
          },
          "size": 1000
        }
    else:
        evq = {
          "query": {
            "bool": {
             "must": [   
                 {"match": {"event_attributes.object.object_id": id}},
                 {"match": {"user_query": query}}
             ]
            }
          },
          "size": 1000
        }
    results = client.search(body=evq, index='ubi_events')
    return results

def get_clicks(client, id, query):
    results = get_events(client, id, query, 'click')
    return results

def count_clicks(client, id, query):
    results = get_clicks(client, id, query)
    return results['hits']['total']['value']

In [8]:
# Create the client with SSL/TLS and hostname verification disabled.
client = OpenSearch(
    hosts = [{'host': OPENSEARCH_HOST, 'port': OPENSEARCH_PORT}],
    http_compress = True, # enables gzip compression for request bodies
    use_ssl = False,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False
)

In [None]:
ubi_queries = client.search(body={'query': {'match_all': {}}, "size":10000-31, "from": 31}, index='ubi_queries')
ubi_user_queries = [x['_source']['user_query'] for x in ubi_queries['hits']['hits']]
ubi_user_queries

In [17]:
for query in ubi_user_queries[:10]:
    for id,team in run_AB(client, query, 'Demo 2', 'Demo 3'):
        events = get_events(client, id)
        count = events['hits']['total']['value']
        if count:
            print(f"{team} {query}, {id} => {count}")

TeamA weighted blanket, B0835GRY7J => 113
TeamA air fryer, B07FDJMC9Q => 181
TeamB air fryer, B086542G1M => 80
TeamA surge protector power strip, B00TP1C51M => 2


In [79]:
for query in ubi_user_queries[:100]:
    for id,team in run_AB(client, query, 'Demo 2', 'Demo 3'):
        count = count_clicks(client, id, query)
        if count:
            print(f"{team} {query}, {id} => {count}")

TeamB pokemon, B07DMFDC6W => 9
TeamB ipod touch, B07FM5Z5C3 => 21
TeamA 30 inch ceiling fan without light, B001JBPHRU => 7


In [40]:
query_set_df = pd.DataFrame(ubi_user_queries, columns=['query'])
query_set_df

Unnamed: 0,query
0,vibrator
1,weighted blanket
2,apple watch
3,air fryer
4,surge protector power strip
...,...
9964,metal razor for women
9965,metagenics phytomulti without iron
9966,wheelchair cushions for pressure sores
9967,barbie dolls


In [None]:
query_set_df['res'] = query_set_df['query'].apply(lambda x: [ (id, count_clicks(client, id, x), team) for id,team in run_AB(client, x, 'Demo 2', 'Demo 3')])
query_set_df = query_set_df.explode('res')

In [101]:
query_set_df = query_set_df.join(pd.DataFrame(query_set_df['res'].values.tolist(), columns=['id', 'clicks', 'team']))
query_set_df

Unnamed: 0,query,res,id,clicks,team
0,vibrator,"(B07F9RR8PN, 0, TeamB)",B07F9RR8PN,0.0,TeamB
0,vibrator,"(B07GZHJ3NL, 0, TeamA)",B07F9RR8PN,0.0,TeamB
0,vibrator,"(B005VJA2LW, 0, TeamA)",B07F9RR8PN,0.0,TeamB
1,weighted blanket,"(B075W9VSW7, 0, TeamB)",B07GZHJ3NL,0.0,TeamA
1,weighted blanket,"(B097DWWTLM, 0, TeamA)",B07GZHJ3NL,0.0,TeamA
...,...,...,...,...,...
9968,lifevac choking device,"(B07P5TTMLJ, 0, TeamA)",B01F41UBY4,0.0,TeamB
9968,lifevac choking device,"(B01MSWT7H7, 0, TeamA)",B01F41UBY4,0.0,TeamB
9968,lifevac choking device,"(B00TRM0UI4, 0, TeamB)",B01F41UBY4,0.0,TeamB
9968,lifevac choking device,"(B07L4BFQBK, 0, TeamA)",B01F41UBY4,0.0,TeamB


In [102]:
query_set_df[query_set_df['team'] == 'TeamA'].describe()

Unnamed: 0,clicks
count,47835.0
mean,0.01108
std,0.284956
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,12.0


In [103]:
query_set_df[query_set_df['team'] == 'TeamB'].describe()

Unnamed: 0,clicks
count,48178.0
mean,0.028727
std,0.630079
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,21.0


In [116]:
query_set_df[query_set_df['team'] == 'TeamA'][query_set_df[query_set_df['team'] == 'TeamA']['clicks'] > 0]

Unnamed: 0,query,res,id,clicks,team
4806,fur real toys,"(B07X3TZFMY, 0, TeamA)",B001JBPHRU,7.0,TeamA
4807,fur real toys,"(B094D7DCXG, 0, TeamB)",B001JBPHRU,7.0,TeamA
4808,fur real toys,"(B0091OLFQE, 0, TeamA)",B001JBPHRU,7.0,TeamA
4809,fur real toys,"(B01NAQFR9A, 0, TeamB)",B001JBPHRU,7.0,TeamA
4810,fur real toys,"(B07RBYM3P2, 0, TeamA)",B001JBPHRU,7.0,TeamA
...,...,...,...,...,...
90677,halloween wristbands,"(B07TBHQL7B, 0, TeamB)",B00HD0ELFK,8.0,TeamA
90678,halloween wristbands,"(B07VNT7F1M, 0, TeamA)",B00HD0ELFK,8.0,TeamA
90679,halloween wristbands,"(B07VYZD3YN, 0, TeamB)",B00HD0ELFK,8.0,TeamA
90680,halloween wristbands,"(B07DPG7C28, 0, TeamB)",B00HD0ELFK,8.0,TeamA


In [117]:
query_set_df[query_set_df['team'] == 'TeamB'][query_set_df[query_set_df['team'] == 'TeamB']['clicks'] > 0]

Unnamed: 0,query,res,id,clicks,team
2554,10 inch fan,"(B071W1GBX8, 0, TeamB)",B07DMFDC6W,9.0,TeamB
2555,10 inch fan,"(B07D52VZB9, 0, TeamA)",B07DMFDC6W,9.0,TeamB
2556,10 inch fan,"(B001JBPHRU, 0, TeamB)",B07DMFDC6W,9.0,TeamB
2557,10 inch fan,"(B088H3DJDX, 0, TeamB)",B07DMFDC6W,9.0,TeamB
2558,10 inch fan,"(B06XFRNPR8, 0, TeamA)",B07DMFDC6W,9.0,TeamB
...,...,...,...,...,...
77639,rubber license plate frame,"(B00BY8KCFA, 0, TeamB)",B07XFR5M5G,1.0,TeamB
77640,rubber license plate frame,"(B083Q2DVQ3, 0, TeamB)",B07XFR5M5G,1.0,TeamB
77641,rubber license plate frame,"(B014I2Q66M, 0, TeamB)",B07XFR5M5G,1.0,TeamB
77642,rubber license plate frame,"(B07N4JLZYQ, 0, TeamB)",B07XFR5M5G,1.0,TeamB


In [118]:
query_set_df[query_set_df['team'] == 'TeamB'][query_set_df[query_set_df['team'] == 'TeamB']['clicks'] > 0].describe()

Unnamed: 0,clicks
count,141.0
mean,9.815603
std,6.313933
min,1.0
25%,5.0
50%,9.0
75%,16.0
max,21.0


In [119]:
query_set_df[query_set_df['team'] == 'TeamA'][query_set_df[query_set_df['team'] == 'TeamA']['clicks'] > 0].describe()

Unnamed: 0,clicks
count,90.0
mean,5.888889
std,2.939252
min,2.0
25%,3.0
50%,6.0
75%,7.0
max,12.0
