In [1]:
%%capture
!{sys.executable} -m pip install openai
!{sys.executable} -m pip install litellm

In [2]:
import os
from openai import OpenAI

if not os.environ["OPENAI_API_KEY"]: 
    os.environ["OPENAI_API_KEY"]= '<REDACTED>'

MODEL = "gpt-4o-mini"

client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)

Open keywords data:

In [3]:
def read_file_to_list(file_path):
    with open(file_path, 'r') as file:
        lines = file.read().splitlines()
    return lines

keywords_list = read_file_to_list('20qs-data/keywords.txt')
keywords_list = keywords_list[1:]
print(f"Number of keywords in data: {len(keywords_list)}")

Number of keywords in data: 2046


## Labeling Functions

In [4]:
from collections import Counter
from tqdm import tqdm

def get_answer(label, keyword):
    
    prompt = f"Keyword: {keyword}\n\nQuestion: {label['question']}\n\n"
    prompt += "Please provide an answer to the question based on the keyword. ONLY ANSWER Yes OR No. IF UNSURE, CHOOSE MOST LIKELY ANSWER FROM Yes OR No. "
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "user", "content": prompt}
        ],
        max_tokens=10
    )
    response = response.choices[0].message.content

    return response


def label_keywords(label, keywords, labels, results):
    
    label_tag = label['tag']
    yes_no_counts = Counter()

    for keyword in tqdm(keywords, desc = 'label_keywords', position = 0, leave = True, ncols = 100):
        answer = get_answer(label, keyword)
        answer = answer.lower()
        # Count yes/no answers
        if 'yes' in answer:
            yes_no_counts['yes'] += 1
            answer = 'yes'
        elif 'no' in answer:
            yes_no_counts['no'] += 1
            answer = 'no'
        else:
            yes_no_counts['err'] +=1
            answer = 'err'
        if keyword in labels:
            labels[keyword][label_tag] = answer
        else:
            labels[keyword] = {label_tag: answer}

    # Calculate percentages
    total_answers = sum(yes_no_counts.values())
    yes_percentage = (yes_no_counts['yes'] / total_answers) * 100 if total_answers > 0 else 0
    no_percentage = (yes_no_counts['no'] / total_answers) * 100 if total_answers > 0 else 0
    err_percentage = (yes_no_counts['err'] / total_answers) * 100 if total_answers > 0 else 0

    # Print results
    print(label['question'])
    print(f"Yes answers: {yes_no_counts['yes']}, percentage: {yes_percentage:.2f}%")
    print(f"No answers: {yes_no_counts['no']}, percentage: {no_percentage:.2f}%")
    print(f"Error answers: {yes_no_counts['err']}, percentage: {err_percentage:.2f}%")

    # Save results
    results[label_tag] = {
        'yes_percentage': yes_percentage,
        'no_percentage': no_percentage,
        'yes_counts': yes_no_counts['yes'],
        'no_counts': yes_no_counts['no'],
        'err_counts': yes_no_counts['err']
    }

    return labels, results

Read current keyword labels and results for adding labels:

In [7]:
import json

def read_jsonl_and_transform(file_path):
    result_dict = {}

    with open(file_path, 'r') as file:
        for line in file:
            record = json.loads(line)
            keyword = record.pop('keyword')
            result_dict[keyword] = record

    return result_dict

def read_json_to_dict(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

keyword_labels, results = read_jsonl_and_transform('20qs-data/labeled_keywords-v2.jsonl'), read_json_to_dict('20qs-data/labeling_results-v2.jsonl')

OR start without any labels:

In [13]:
#keyword_labels = {}
#results = {}

Label places/things:

In [14]:
places_label = {'tag': 'place', 'question': "Is it a place?"}

keyword_labels, results = label_keywords(places_label, keywords_list, keyword_labels, results)

label_keywords:   0%|                                                      | 0/2046 [00:00<?, ?it/s]

label_keywords: 100%|███████████████████████████████████████████| 2046/2046 [17:45<00:00,  1.92it/s]

Is it a place?
Yes answers: 699, percentage: 34.16%
No answers: 1347, percentage: 65.84%
Error answers: 0, percentage: 0.00%





In [8]:
keyword_places = [keyword for keyword in keywords_list if keyword_labels[keyword]['place'] == 'yes']
keyword_things = [keyword for keyword in keywords_list if keyword_labels[keyword]['place'] == 'no']

print(f"Number of keywords labeled as things: {len(keyword_things)}.")

Number of keywords labeled as things: 1347.


## Things Branch

Labels to apply:

In [16]:
labels_things = [
    {'tag': 'food', 'question': "Is it a food?"},
    {'tag': 'beverage', 'question': "Is it a beverage?"},
    {'tag': 'living', 'question': "Is it a living thing?"},
    {'tag': 'plant', 'question': "Is it a plant?"},
    {'tag': 'animal', 'question': 'Is it an animal?'},
    {'tag': 'electronics', 'question': "Is it related to electronics or technology?"},
    {'tag': 'furniture', 'question': 'Is it furniture?'},
    {'tag': 'transportation, vehicles', 'question': 'Is it related to transportation or vehicles?'},
    {'tag': 'man-made', 'question': "Is it a man-made thing?"},
    {'tag': 'furniture', 'question': "Is it furniture?"},
    {'tag': 'architecture, construction', 'question': 'Is it related to architectural structures or construction?'},
    {'tag': 'natural material, resource', 'question': 'Is it a natural material or resource?'},
    {'tag': 'natural phenomenon', 'question': 'Is it a natural phenomenon?'},
    {'tag': 'industry, manufacturing', 'question': "Is it related to industrial production or manufacturing?"},
    {'tag': 'agriculture', 'question': "Is it related to agricultural production?"},
    {'tag': 'arts, media', 'question': "Is it broadly related to the arts or media?"},
    {'tag': 'safety', 'question': "Is it related to safety or safety equipments?"},
    {'tag': 'medicine', 'question': "Is it broadly related to medicine or healthcare?"},
    {'tag': 'clothing, accessories, beauty', 'question': "Is it related to clothing, accessories or beauty products?"},
    {'tag': 'sports, entertainment', 'question': "Is it broadly related to entertainment or sports?"}
]

In [17]:
for label in tqdm(labels_things, desc = 'labels', ncols=150):
    keyword_labels, results = label_keywords(label, keyword_things, keyword_labels, results)

label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:38<00:00,  1.93it/s]                           | 0/20 [00:00<?, ?it/s]
labels:   5%|█████▏                                                                                                 | 1/20 [11:38<3:41:17, 698.80s/it]

Is it a food?
Yes answers: 239, percentage: 17.74%
No answers: 1108, percentage: 82.26%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:33<00:00,  1.94it/s]
labels:  10%|██████████▎                                                                                            | 2/20 [23:11<3:28:38, 695.50s/it]

Is it a beverage?
Yes answers: 46, percentage: 3.41%
No answers: 1301, percentage: 96.59%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:03<00:00,  2.03it/s]
labels:  15%|███████████████▍                                                                                       | 3/20 [34:15<3:12:51, 680.70s/it]

Is it a living thing?
Yes answers: 191, percentage: 14.18%
No answers: 1156, percentage: 85.82%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:27<00:00,  1.96it/s]
labels:  20%|████████████████████▌                                                                                  | 4/20 [45:42<3:02:13, 683.34s/it]

Is it a plant?
Yes answers: 131, percentage: 9.73%
No answers: 1216, percentage: 90.27%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:25<00:00,  1.96it/s]
labels:  25%|█████████████████████████▊                                                                             | 5/20 [57:08<2:51:03, 684.22s/it]

Is it an animal?
Yes answers: 73, percentage: 5.42%
No answers: 1274, percentage: 94.58%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:46<00:00,  1.91it/s]
labels:  30%|██████████████████████████████▎                                                                      | 6/20 [1:08:54<2:41:25, 691.79s/it]

Is it related to electronics or technology?
Yes answers: 322, percentage: 23.90%
No answers: 1025, percentage: 76.10%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:38<00:00,  1.93it/s]
labels:  35%|███████████████████████████████████▎                                                                 | 7/20 [1:20:32<2:30:19, 693.84s/it]

Is it furniture?
Yes answers: 69, percentage: 5.12%
No answers: 1278, percentage: 94.88%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:41<00:00,  1.92it/s]
labels:  40%|████████████████████████████████████████▍                                                            | 8/20 [1:32:14<2:19:14, 696.25s/it]

Is it related to transportations or vehicles?
Yes answers: 166, percentage: 12.32%
No answers: 1181, percentage: 87.68%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:03<00:00,  2.03it/s]
labels:  45%|█████████████████████████████████████████████▍                                                       | 9/20 [1:43:17<2:05:45, 685.93s/it]

Is it a man-made thing?
Yes answers: 1067, percentage: 79.21%
No answers: 280, percentage: 20.79%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:30<00:00,  1.95it/s]
labels:  50%|██████████████████████████████████████████████████                                                  | 10/20 [1:54:47<1:54:33, 687.31s/it]

Is it furniture?
Yes answers: 72, percentage: 5.35%
No answers: 1275, percentage: 94.65%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:09<00:00,  2.01it/s]
labels:  55%|███████████████████████████████████████████████████████                                             | 11/20 [2:05:57<1:42:18, 682.01s/it]

Is it related to architectural structures or construction?
Yes answers: 241, percentage: 17.89%
No answers: 1106, percentage: 82.11%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:33<00:00,  1.94it/s]
labels:  60%|████████████████████████████████████████████████████████████                                        | 12/20 [2:17:31<1:31:25, 685.63s/it]

Is it a natural material or resource?
Yes answers: 332, percentage: 24.65%
No answers: 1015, percentage: 75.35%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:19<00:00,  1.98it/s]
labels:  65%|█████████████████████████████████████████████████████████████████                                   | 13/20 [2:28:50<1:19:45, 683.67s/it]

Is it a natural phenomenon?
Yes answers: 120, percentage: 8.91%
No answers: 1227, percentage: 91.09%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:02<00:00,  2.03it/s]
labels:  70%|██████████████████████████████████████████████████████████████████████                              | 14/20 [2:39:53<1:07:44, 677.41s/it]

Is it related to industrial production or manufacturing?
Yes answers: 659, percentage: 48.92%
No answers: 688, percentage: 51.08%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:11<00:00,  2.01it/s]
labels:  75%|████████████████████████████████████████████████████████████████████████████▌                         | 15/20 [2:51:05<56:17, 675.53s/it]

Is it related to agricultural production?
Yes answers: 468, percentage: 34.74%
No answers: 879, percentage: 65.26%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [10:56<00:00,  2.05it/s]
labels:  80%|█████████████████████████████████████████████████████████████████████████████████▌                    | 16/20 [3:02:01<44:39, 669.86s/it]

Is it broadly related to the arts or media?
Yes answers: 467, percentage: 34.67%
No answers: 880, percentage: 65.33%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:26<00:00,  1.96it/s]
labels:  85%|██████████████████████████████████████████████████████████████████████████████████████▋               | 17/20 [3:13:27<33:44, 674.79s/it]

Is it related to safety or safety equipments?
Yes answers: 332, percentage: 24.65%
No answers: 1015, percentage: 75.35%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:42<00:00,  1.92it/s]
labels:  90%|███████████████████████████████████████████████████████████████████████████████████████████▊          | 18/20 [3:25:10<22:46, 683.03s/it]

Is it broadly related to medicine or healthcare?
Yes answers: 281, percentage: 20.86%
No answers: 1066, percentage: 79.14%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:23<00:00,  1.97it/s]
labels:  95%|████████████████████████████████████████████████████████████████████████████████████████████████▉     | 19/20 [3:36:33<11:23, 683.21s/it]

Is it related to clothing, accessories or beauty products?
Yes answers: 249, percentage: 18.49%
No answers: 1098, percentage: 81.51%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:28<00:00,  1.96it/s]
labels: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [3:48:02<00:00, 684.10s/it]

Is it broadly related to entertainment or sports?
Yes answers: 281, percentage: 20.86%
No answers: 1066, percentage: 79.14%
Error answers: 0, percentage: 0.00%





In [9]:
labels_things_to_add_or_modify = [
    {'tag': 'food, beverage, cooking', 'question': 'Is it related to food, beverages or cooking?'},
    {'tag': 'handheld', 'question': 'Is it something a person can hold in their hand?'},
    {'tag': 'industry, manufacturing', 'question': "Is it used in industrial production or manufacturing?"},
    {'tag': 'agriculture', 'question': "Is it used in agricultural production?"},
    {'tag': 'transportation, vehicles', 'question': 'Is it related to transportation or vehicles?'},
    {'tag': 'indoors', 'question': 'Is it something that can be found indoors?'}
]

In [10]:
for label in tqdm(labels_things_to_add_or_modify, desc = 'labels', ncols=150):
    keyword_labels, results = label_keywords(label, keyword_things, keyword_labels, results)

label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [10:50<00:00,  2.07it/s]                            | 0/6 [00:00<?, ?it/s]
labels:  17%|█████████████████▋                                                                                        | 1/6 [10:50<54:14, 650.84s/it]

Is it related to food, beverages or cooking?
Yes answers: 410, percentage: 30.44%
No answers: 937, percentage: 69.56%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [10:48<00:00,  2.08it/s]
labels:  33%|███████████████████████████████████▎                                                                      | 2/6 [21:39<43:18, 649.57s/it]

Is it something a person can hold in their hand?
Yes answers: 941, percentage: 69.86%
No answers: 406, percentage: 30.14%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [10:57<00:00,  2.05it/s]
labels:  50%|█████████████████████████████████████████████████████                                                     | 3/6 [32:37<32:39, 653.25s/it]

Is it used in industrial production or manufacturing?
Yes answers: 704, percentage: 52.26%
No answers: 643, percentage: 47.74%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [10:48<00:00,  2.08it/s]
labels:  67%|██████████████████████████████████████████████████████████████████████▋                                   | 4/6 [43:25<21:42, 651.47s/it]

Is it used in agricultural production?
Yes answers: 556, percentage: 41.28%
No answers: 791, percentage: 58.72%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [10:50<00:00,  2.07it/s]
labels:  83%|████████████████████████████████████████████████████████████████████████████████████████▎                 | 5/6 [54:16<10:51, 651.01s/it]

Is it related to transportation or vehicles?
Yes answers: 169, percentage: 12.55%
No answers: 1178, percentage: 87.45%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|███████████████████████████████████████████| 1347/1347 [11:07<00:00,  2.02it/s]
labels: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [1:05:23<00:00, 653.92s/it]

Is it something that can be found indoors?
Yes answers: 1133, percentage: 84.11%
No answers: 214, percentage: 15.89%
Error answers: 0, percentage: 0.00%





## Places Branch
Out of use as of the last competition update.

In [18]:
labels_places = [
    {'tag': 'country', 'question': "Is it a country?"},
    {'tag': 'city', 'question': "Is it a city?"},
    {'tag': 'capital', 'question': "Is it a capital of a country?"},
    {'tag': 'landlocked', 'question': "Is the place either a landlocked country or a place located in a landlocked country?"},
    {'tag': 'coast', 'question': 'Is this place found along the coast?'},
    {'tag': 'geological feature', 'question': 'Is it a geological feature?'},
    {'tag': 'africa', 'question': "Is it in Africa?"},
    {'tag': 'asia', 'question': 'Is it in Asia?'},
    {'tag': 'europe', 'question': 'Is it in Europe?'},
    {'tag': 'hemisphere', 'question': 'Is it in the Western hemisphere?'},
    {'tag': 'oceania', 'question': 'Is it in Oceania?'},
    {'tag': 'europe or asia', 'question': "Is it either in europe or asia?"},
    {'tag': 'americas', 'question': "Is it in the Americas?"},
    {'tag': 'north america', 'question': 'Is it in North America?'},
    {'tag': 'man-made landmark', 'question': 'Is it a man-made landmark?'},
    {'tag': 'natural landmark', 'question': 'Is it a natural landmark?'},
    {'tag': 'equator', 'question': "Is it north of the equator?"},
    {'tag': 'body of water', 'question': "Is it a body of water?"},
    {'tag': 'biome', 'question': "Is it a type of biome?"},
    {'tag': 'building', 'question': "Is it a type of building or stucture?"}
]

In [19]:
for label in tqdm(labels_places, desc = 'labels', ncols=150):
    keyword_labels, results = label_keywords(label, keyword_places, keyword_labels, results)

label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:20<00:00,  1.84it/s]                           | 0/20 [00:00<?, ?it/s]
labels:   5%|█████▏                                                                                                 | 1/20 [06:20<2:00:33, 380.74s/it]

Is it a country?
Yes answers: 194, percentage: 27.75%
No answers: 505, percentage: 72.25%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:09<00:00,  1.89it/s]
labels:  10%|██████████▎                                                                                            | 2/20 [12:29<1:52:09, 373.88s/it]

Is it a city?
Yes answers: 307, percentage: 43.92%
No answers: 392, percentage: 56.08%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:01<00:00,  1.93it/s]
labels:  15%|███████████████▍                                                                                       | 3/20 [18:31<1:44:22, 368.36s/it]

Is it a capital of a country?
Yes answers: 135, percentage: 19.31%
No answers: 564, percentage: 80.69%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:00<00:00,  1.94it/s]
labels:  20%|████████████████████▌                                                                                  | 4/20 [24:32<1:37:26, 365.38s/it]

Is the place either a landlocked country or a place located in a landlocked country?
Yes answers: 63, percentage: 9.01%
No answers: 636, percentage: 90.99%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:06<00:00,  1.91it/s]
labels:  25%|█████████████████████████▊                                                                             | 5/20 [30:39<1:31:27, 365.85s/it]

Is this place found along the coast?
Yes answers: 296, percentage: 42.35%
No answers: 403, percentage: 57.65%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:10<00:00,  1.89it/s]
labels:  30%|██████████████████████████████▉                                                                        | 6/20 [36:49<1:25:43, 367.41s/it]

Is it a geological feature?
Yes answers: 66, percentage: 9.44%
No answers: 633, percentage: 90.56%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [05:55<00:00,  1.97it/s]
labels:  35%|████████████████████████████████████                                                                   | 7/20 [42:45<1:18:45, 363.54s/it]

Is it in Africa?
Yes answers: 152, percentage: 21.75%
No answers: 547, percentage: 78.25%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:10<00:00,  1.88it/s]
labels:  40%|█████████████████████████████████████████▏                                                             | 8/20 [48:56<1:13:10, 365.90s/it]

Is it in Asia?
Yes answers: 237, percentage: 33.91%
No answers: 462, percentage: 66.09%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:08<00:00,  1.90it/s]
labels:  45%|██████████████████████████████████████████████▎                                                        | 9/20 [55:04<1:07:14, 366.80s/it]

Is it in Europe?
Yes answers: 266, percentage: 38.05%
No answers: 433, percentage: 61.95%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:06<00:00,  1.91it/s]
labels:  50%|██████████████████████████████████████████████████                                                  | 10/20 [1:01:11<1:01:08, 366.81s/it]

Is it in the Western hemisphere?
Yes answers: 242, percentage: 34.62%
No answers: 457, percentage: 65.38%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:03<00:00,  1.92it/s]
labels:  55%|████████████████████████████████████████████████████████                                              | 11/20 [1:07:14<54:51, 365.70s/it]

Is it in Oceania?
Yes answers: 69, percentage: 9.87%
No answers: 630, percentage: 90.13%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:03<00:00,  1.92it/s]
labels:  60%|█████████████████████████████████████████████████████████████▏                                        | 12/20 [1:13:18<48:40, 365.03s/it]

Is it either in europe or asia?
Yes answers: 398, percentage: 56.94%
No answers: 301, percentage: 43.06%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:20<00:00,  1.84it/s]
labels:  65%|██████████████████████████████████████████████████████████████████▎                                   | 13/20 [1:19:38<43:08, 369.72s/it]

Is it in the Americas?
Yes answers: 279, percentage: 39.91%
No answers: 420, percentage: 60.09%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:07<00:00,  1.90it/s]
labels:  70%|███████████████████████████████████████████████████████████████████████▍                              | 14/20 [1:25:45<36:53, 368.92s/it]

Is it in North America?
Yes answers: 236, percentage: 33.76%
No answers: 463, percentage: 66.24%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:05<00:00,  1.92it/s]
labels:  75%|████████████████████████████████████████████████████████████████████████████▌                         | 15/20 [1:31:50<30:38, 367.74s/it]

Is it a man-made landmark?
Yes answers: 307, percentage: 43.92%
No answers: 392, percentage: 56.08%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [05:53<00:00,  1.98it/s]
labels:  80%|█████████████████████████████████████████████████████████████████████████████████▌                    | 16/20 [1:37:44<24:14, 363.57s/it]

Is it a natural landmark?
Yes answers: 102, percentage: 14.59%
No answers: 597, percentage: 85.41%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [05:54<00:00,  1.97it/s]
labels:  85%|██████████████████████████████████████████████████████████████████████████████████████▋               | 17/20 [1:43:39<18:02, 360.93s/it]

Is it north of the equator?
Yes answers: 541, percentage: 77.40%
No answers: 157, percentage: 22.46%
Error answers: 1, percentage: 0.14%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:15<00:00,  1.86it/s]
labels:  90%|███████████████████████████████████████████████████████████████████████████████████████████▊          | 18/20 [1:49:55<12:10, 365.38s/it]

Is it a body of water?
Yes answers: 20, percentage: 2.86%
No answers: 679, percentage: 97.14%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [06:03<00:00,  1.92it/s]
labels:  95%|████████████████████████████████████████████████████████████████████████████████████████████████▉     | 19/20 [1:55:58<06:04, 364.72s/it]

Is it a type of biome?
Yes answers: 8, percentage: 1.14%
No answers: 691, percentage: 98.86%
Error answers: 0, percentage: 0.00%


label_keywords: 100%|█████████████████████████████████████████████| 699/699 [05:54<00:00,  1.97it/s]
labels: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [2:01:53<00:00, 365.67s/it]

Is it a type of building or stucture?
Yes answers: 50, percentage: 7.15%
No answers: 649, percentage: 92.85%
Error answers: 0, percentage: 0.00%





## Save as JSONL

In [11]:
def make_list_from_dict(original_dict):
    list_of_dicts = []
    for key, sub_dict in original_dict.items():
        # Create a new dictionary that includes the key from the original dict
        new_dict = {"keyword": key}
        # Update the new dictionary with the key-value pairs from the sub-dictionary
        new_dict.update(sub_dict)
        # Append the new dictionary to the list
        list_of_dicts.append(new_dict)
    return list_of_dicts

labeled_keywords_list = make_list_from_dict(keyword_labels)

In [12]:
with open("20qs-data/labeled_keywords_v2.jsonl", "w") as f:
    for i in labeled_keywords_list:
        json.dump(i, f)
        f.write('\n')

with open("20qs-data/labeling_results_v2.jsonl", "w") as f:
    json.dump(results, f)