# Generate Descriptions by GPT3.5

In [1]:
import openai
import json
import time
import itertools

In [2]:
def stringtolist(description):
    return [descriptor[2:] for descriptor in description.split('\n') if (descriptor != '') and (descriptor.startswith('- '))]

In [3]:
from tenacity import (
        retry,
        stop_after_attempt,
        wait_random_exponential,
) # for exponential backoff

In [4]:
openai.api_key = "XXXXXXXXXXXXXXXXXXXXXXXX" # Replace this with your own openai.api_key 

## Dataset Class Lists

In [5]:
ADE_CLASSES = [
        'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ',
        'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth',
        'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car',
        'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug',
        'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe',
        'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column',
        'signboard', 'chest of drawers', 'counter', 'sand', 'sink',
        'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path',
        'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door',
        'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table',
        'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove',
        'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar',
        'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower',
        'chandelier', 'awning', 'streetlight', 'booth', 'television receiver',
        'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister',
        'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van',
        'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything',
        'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent',
        'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'storage tank',
        'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake',
        'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce',
        'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen',
        'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass',
        'clock', 'flag']

COCO_PANO_CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
               'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
               'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 
               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
               'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
               'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
               'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 
               'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
               'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
               'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 
               'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 
               'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
               'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
               'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
               'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf',
               'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile',
               'wall-wood', 'water-other', 'window-blind', 'window-other',
               'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
               'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
               'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 
               'food-other-merged', 'building-other-merged', 'rock-merged',
               'wall-other-merged', 'rug-merged']

CITY_BDD_CLASSES = ['road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
                 'traffic light', 'traffic sign', 'vegetation', 'terrain',
                 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train',
                 'motorcycle', 'bicycle']

MAPILLARY_V1_CLASSES = ['Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail',
                 'Barrier', 'Wall', 'Bike Lane', 'Crosswalk - Plain',
                 'Curb Cut', 'Parking', 'Pedestrian Area', 'Rail Track',
                 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building',
                 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist',
                 'Other Rider', 'Lane Marking - Crosswalk',
                 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow',
                 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench',
                 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera',
                 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole',
                 'Phone Booth', 'Pothole', 'Street Light', 'Pole',
                 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light',
                 'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can',
                 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', 'Motorcycle',
                 'On Rails', 'Other Vehicle', 'Trailer', 'Truck',
                 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled']

PASCAL_CONTEXT_59 = ['aeroplane', 'bag', 'bed', 'bedclothes', 'bench', 'bicycle',
                 'bird', 'boat', 'book', 'bottle', 'building', 'bus',
                 'cabinet', 'car', 'cat', 'ceiling', 'chair', 'cloth',
                 'computer', 'cow', 'cup', 'curtain', 'dog', 'door', 'fence',
                 'floor', 'flower', 'food', 'grass', 'ground', 'horse',
                 'keyboard', 'light', 'motorbike', 'mountain', 'mouse',
                 'person', 'plate', 'platform', 'pottedplant', 'road', 'rock',
                 'sheep', 'shelves', 'sidewalk', 'sign', 'sky', 'snow', 'sofa',
                 'table', 'track', 'train', 'tree', 'truck', 'tvmonitor',
                 'wall', 'water', 'window', 'wood']

VOC = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
       'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
       'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep',
       'sofa', 'train', 'tvmonitor']

COCO_STUFF_164K = [
            'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
            'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
            'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
            'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
            'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
            'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
            'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
            'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
            'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
            'scissors', 'teddy bear', 'hair drier', 'toothbrush', 'banner',
            'blanket', 'branch', 'bridge', 'building-other', 'bush', 'cabinet',
            'cage', 'cardboard', 'carpet', 'ceiling-other', 'ceiling-tile',
            'cloth', 'clothes', 'clouds', 'counter', 'cupboard', 'curtain',
            'desk-stuff', 'dirt', 'door-stuff', 'fence', 'floor-marble',
            'floor-other', 'floor-stone', 'floor-tile', 'floor-wood', 'flower',
            'fog', 'food-other', 'fruit', 'furniture-other', 'grass', 'gravel',
            'ground-other', 'hill', 'house', 'leaves', 'light', 'mat', 'metal',
            'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net',
            'paper', 'pavement', 'pillow', 'plant-other', 'plastic',
            'platform', 'playingfield', 'railing', 'railroad', 'river', 'road',
            'rock', 'roof', 'rug', 'salad', 'sand', 'sea', 'shelf',
            'sky-other', 'skyscraper', 'snow', 'solid-other', 'stairs',
            'stone', 'straw', 'structural-other', 'table', 'tent',
            'textile-other', 'towel', 'tree', 'vegetable', 'wall-brick',
            'wall-concrete', 'wall-other', 'wall-panel', 'wall-stone',
            'wall-tile', 'wall-wood', 'water-other', 'waterdrops',
            'window-blind', 'window-other', 'wood']

## Prompt Generation

In [6]:
def generate_prompt(category_name: str):
    return f"""Please make the descriptions to have a similar level of detail and a consistent type of information, which should be beneficial for clustering and machine learning applications. Each description will follow a structured format:

Start with a general description of the object or scene.
Describe the shape, orientation, and primary physical characteristics.
Mention the material, texture, or typical colors.
Note common features or elements associated with the object or scene.
End with possible additional details, variations, or environmental context.
Here are the improved, consistent descriptions:

wall:
- Vertical structure
- Flat, consistent surface
- Solid construction, typically opaque
- Varied finishes: painted, wallpapered, textured
- Features mounted objects, such as art or shelving
- Defined by corners and intersections
- May include switches or outlets

building:
- Architectural structure with enclosure
- Features walls, windows, and roof
- Diverse designs, reflecting architectural styles
- Built from materials like brick, concrete, glass
- Comprises multiple stories or floors
- Exterior may display signage or logos
- Often situated within a specific landscape
- May have distinctive landmarks or features

sky:
- Upper atmospheric expanse
- Color ranges from blue to gray
- Can be clear or cloud-filled
- Exhibits varying levels of brightness
- Daytime features sun, nighttime may show moon and stars
- Visible horizon line
- Can contain flying objects like birds or planes
- Weather phenomena may be present

floor:
- Horizontal expanse, foundational surface
- Material contrast with walls; varied textures
- May be patterned or designed
- Hosts furniture and other objects
- Shows a spectrum of colors and shades

Please apply this format to describe {category_name}.
"""

## Description Generation

In [7]:
def partition(lst, size):
    for i in range(0, len(lst), size):
        yield list(itertools.islice(lst, i, i + size))

In [8]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(15))
def generate_response(model, message, temperature=0):
    response = openai.ChatCompletion.create(model=model,
                                            messages=message,
                                            temperature=temperature
                                            )
    return response

In [9]:
def obtain_descriptors_and_save(filename, class_list, model="gpt-3.5-turbo", sleep_sec=0.5):
    responses = {}
    descriptors = {}
    
    prompts = [generate_prompt(category.replace('_', ' ')) for category in class_list]
    responses = []

    if "gpt-3.5" in model:
        for i, msg in enumerate(prompts):
            message = [{"role": "system", "content": "You are a chatbot"}]
            message.append({"role": "user", "content": msg})
            response = generate_response(model, message)
            responses.append(response)
            time.sleep(sleep_sec)
        response_texts = []
        for resp in responses:
            content = resp['choices'][0]['message']['content']
            response_texts.append(content)
        descriptors_list = [stringtolist(response_text) for response_text in response_texts]
        descriptors = {cat: descr for cat, descr in zip(class_list, descriptors_list)}
    
    else:
        for i, prompt_partition in enumerate(partition(prompts, 20)):
            response = openai.Completion.create(model=model,
                                                prompt=prompt_partition,
                                                temperature=0.,
                                                max_tokens=100,
                                                )
            responses.append(response)
            time.sleep(sleep_sec)
        response_texts = [r["text"] for resp in responses for r in resp['choices']]
        descriptors_list = [stringtolist(response_text) for response_text in response_texts]
        descriptors = {cat: descr for cat, descr in zip(class_list, descriptors_list)}

    # save descriptors to json file
    if not filename.endswith('.json'):
        filename += '.json'
    with open(filename, 'w') as fp:
        json.dump(descriptors, fp, indent=4)

In [10]:
obtain_descriptors_and_save("test.json", ADE_CLASSES, model="gpt-3.5-turbo", sleep_sec=0.5)