In [None]:
!pip install pypdfium2

In [None]:
import boto3
from io import BytesIO
import pypdfium2 as pdfium 
import os
import re
from transformers import AutoTokenizer, pipeline, Blip2Processor, Blip2ForConditionalGeneration, BitsAndBytesConfig
from PIL import Image
import torch

# goal
# 1) pick 2 or 3 language models that do image annotations 
# 2) run them on small sample of images from appraisals
# 3) manually look at annotations and evaluate performance


In [None]:
def read_from_s3(file_name):
    """
    Read one file in at a time.
    """
    s3 = boto3.client("s3")
    bucket = 'a1156-val'
    data_key = f"backend_appraisals/sample_data/{file_name}.pdf"
    data_location = 's3://{}/{}'.format(bucket, data_key) 

    pdf_file = s3.get_object(Bucket = bucket, Key = data_key)[
        "Body"
    ].read()

    return pdf_file


def extract_images(file_name): 
    """
    Extract all images from a pdf and store in a list. 
    """
    all_images = []
    pdf = pdfium.PdfDocument(BytesIO(read_from_s3(file_name)))

    for i in range(len(pdf)):
        page = pdf[i]
        
        for obj in page.get_objects(): 
            if obj.type == 3: 
                image = obj.get_bitmap().to_pil() 
                all_images.append(image)

    return all_images

    
# def extract_images_and_nearby_text(file_name): 
#     """
#     Extracts images and nearby text from each page and returns a list of dictionaries 
#     with each image and nearby text (if applicable). 
#     """
#     pdf = pdfium.PdfDocument(BytesIO(read_from_s3(file_name)))
#     images_and_text_all_pages = []

#     #loop through every page in pdf
#     for i in range(len(pdf)):
#         page = pdf[i] 
#         images_with_positions = [] 

#         #extract each image and position 
#         for obj in page.get_objects(): 
#             if obj.type == 3: 
#                 image = obj.get_bitmap().to_pil() 
#                 pos = obj.get_pos() 
#                 images_with_positions.append((image, pos)) 

#         if images_with_positions:
#             page_text = page.get_textpage()
#             #images_and_text = []

#             for image, pos in images_with_positions:
#                 #expand the image box to the right 
#                 expanded_box = (pos[0], pos[1] + 150, pos[2] + 250, pos[3])
#                 nearby_text = page_text.get_text_bounded(*expanded_box)

#                 images_and_text_all_pages.append({
#                     'image': image,
#                     'label': nearby_text
#                 })

#             #images_and_text_all_pages.append(images_and_text)

#     return images_and_text_all_pages

# extracted_images_and_labels = extract_images_and_nearby_text("appraisal_sf")
# #list comprehension to filter
# subject_images = [x for x in extracted_images_and_labels if "comparable" not in x['label'].lower()][3:]




# Configure model
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_compute_dtype=torch.float16
# )

max_new_tokens = 1024
# def mm_generate_caption(pil_image):

#     inputs = processor(images=pil_image, return_tensors="pt")
#     generated_ids = model.generate(**inputs)
#     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
#     return generated_text


def generate_caption(pil_image, prompt = None):

    inputs = processor(images=pil_image, text = prompt, return_tensors="pt")
    generated_ids = model.generate(**inputs, max_new_tokens = 1024, min_length = 20, num_beams = 3)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    return generated_text



In [None]:
extracted_images_sf = extract_images("appraisal_sf")
#applicable to subject property - keep manual for now including labels since sample doc different format
subject_images_sf = extracted_images_sf[3:12]

labels_sf = [ 'subject front', 'subject rear', 'subject street', 'kitchen', 'nook', 'living/dining', 'bedroom', 'bathroom', 'bathroom']

labeled_images_sf = [{'image':img, 'label': lbl} for img, lbl in zip(subject_images_sf, labels_sf)]

for image in subject_images_sf: 
    image.show()



In [None]:
extracted_images_fha = extract_images("fha_appraisal")
#applicable to subject property - keep manual for now including labels since sample doc different format
subject_images_fha = extracted_images_fha[11:18]

labels_fha = ['Subject Front', 'Subject Rear', 'Subject Street', 'Interior', 'Interior', 'Interior', 'Interior']

labeled_images_fha = [{'image':img, 'label': lbl} for img, lbl in zip(subject_images_fha, labels_fha)]

for image in subject_images_fha: 
    image.show()


In [None]:
######################################## BLIP2 CAPTION #######################################################
# zero shot image to text gen

processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", device_map="auto")

#NICE HOUSE
for item in labeled_images_sf:
    pil_image = item['image']
    label = item['label']
    caption = generate_caption(pil_image)
    print(f"Label: {label}, Generated Caption: {caption}")

# NOTES: on par with blip large but cleaner (maybe slightly less detail. should see with a distresed property)
# Label: subject front, Generated Caption: an artist's impression of a modern house in a rural setting, with a deck and patio area
# Label: subject rear, Generated Caption: an artist's rendering of a modern home with a deck on the side of the house and a walkway leading to the front door
# Label: subject street, Generated Caption: an artist's rendering of a modern house in the Netherlands, with a balcony and decking on the roof
# Label: nook, Generated Caption: an open plan kitchen and dining area in a modern home with green walls and a wooden table and chairs
# Label: living/dining, Generated Caption: a modern living room with a couch, coffee table, and a window overlooking the outside of the house
# Label: bedroom, Generated Caption: a 3d rendering of a bedroom with a bed and a window in the middle of the room
# Label: bathroom, Generated Caption: a white bathroom with a shower, toilet and shelf with towels on the wall next to the sink
# Label: bathroom, Generated Caption: a white bathroom with lavender plants on the counter and a sink in the corner of the room

for item in labeled_images_fha:
    pil_image = item['image']
    label = item['label']
    caption = generate_caption(pil_image)
    print(f"Label: {label}, Generated Caption: {caption}")

# NOTE: better at the text out (clearer)
# Label: Subject Rear, Generated Caption: a gray house sitting in the middle of a grassy field with trees on either side of it
# Label: Subject Street, Generated Caption: a view of a residential street with trees and cars parked on both sides of the road in front of a house
# Label: Interior, Generated Caption: a kitchen with white cabinets, a refrigerator, and a stove top oven with a microwave in it
# Label: Interior, Generated Caption: rental homes for rent, listingid 263824, location 801 w new york avenue newark, new jersey 07102
# Label: Interior, Generated Caption: a bedroom with a bed, a dresser, and a window in the corner of the room
# Label: Interior, Generated Caption: a bathroom with a toilet, tub, and window blinds in the corner of the room next to the sink



In [None]:
######################################## BLIP2 VQA #####################################################

# need to fine-tine blip2 on kaggle home data - it thinks every house is in good condition and nothing is wrong when clearly there is a big difference
# prompt = "Question: Describe what is in the image in detail and rate the general condition from a scale of very good, good, neutral, poor, very poor. Answer:"
# for item in labeled_images_sf:
#     pil_image = item['image']
#     label = item['label']
#     response = generate_caption(pil_image, prompt)
#     print(response)

#The house is in very good condition
#The image shows a modern house with a wooden deck, a wooden fence, and a wooden fence. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
#The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area.
#The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# abs house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 
# The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. The house is located in a residential area. 

# for item in labeled_images_fha:
#     pil_image = item['image']
#     label = item['label']
#     response = generate_caption(pil_image, prompt)
#     print(response)


In [None]:
######################################## LLAVA ####################################################

# llava_pipeline = pipeline("image-to-text", model = "llava-hf/llava-1.5-7b-hf",  model_kwargs={"quantization_config": quantization_config})
# llava_prompt = "USER: <image>\nDescribe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. \nASSISTANT:"

# for item in labeled_images_sf:
#     pil_image = item['image']
#     label = item['label']
#     caption = llava_pipeline(pil_image, prompt=llava_prompt, generate_kwargs={"max_new_tokens": max_new_tokens})
#     print(f"Label: {label}, Generated Caption: {caption[0]['generated_text']}")


# Label: subject front, 
# Generated Caption: 
    # USER: Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image features a large, modern-looking house with a deck and a patio. The house is situated in a lush green field, giving it a serene and peaceful atmosphere. 
    # The deck is surrounded by a fence, providing a sense of security. The house has a few potted plants, with one placed near the center of the deck and two others located closer to the house. 
    # There are also a couple of chairs on the deck, one near the center and the other closer to the house. Overall, the house appears to be in good condition, with a well-maintained deck and patio area.
    # The lush green field surrounding the house adds to the beauty of the property.

# Label: subject rear, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image features a large, modern-looking building with a brown roof. The building has a unique design, with a combination of brown and white colors. 
    # The building is surrounded by a lush green field, giving it a serene and picturesque setting. In terms of condition, I would rate the building as very good. 
    # The structure appears to be well-maintained and in good shape, suggesting that it is well-cared for and possibly used for a specific purpose.

# Label: subject street, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image features a large, modern-looking house with a wooden exterior. The house is situated in a residential area, surrounded by other houses. 
    # The house has a deck and a staircase, providing access to the outdoor space. The overall condition of the house appears to be very good, as it is well-maintained and designed 
    # to provide a comfortable living environment.

# Label: kitchen, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image depicts a large, open kitchen with a wooden dining table in the center. The table is surrounded by chairs, and there are multiple bottles and bowls placed on it. 
    # The kitchen is equipped with a refrigerator, an oven, and a sink. The overall condition of the kitchen appears to be very good, with a clean and organized layout.

# Label: nook, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image depicts a spacious and well-lit room with a wooden dining table in the center. The table is surrounded by several chairs, some of which are placed around the table,
    # while others are positioned near the walls. A potted plant is located near the table, adding a touch of greenery to the room. The room appears to be a combination of a living room and a dining area. 
    # The overall condition of the room is good, with a clean and organized layout. The wooden dining table and chairs create a warm and inviting atmosphere, making the room an ideal space 
    # for gatherings and meals.
    
# Label: living/dining, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image depicts a spacious and well-lit living room with a large window. The room features a comfortable couch, a chair, and a dining table. There are also several potted plants, 
    # adding a touch of greenery to the space. The room appears to be in a neutral condition, with no apparent signs of damage or disrepair. The overall atmosphere of the room is inviting and pleasant.

# Label: bedroom, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image features a large, well-lit bedroom with a neatly made bed. The room is decorated with a few potted plants, which are placed around the room, adding a touch of greenery. 
    # There is a chair in the room, and a vase can be seen on a surface. The overall condition of the room appears to be very good, with a clean and organized appearance.

# Label: bathroom, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image features a white bathroom with a shower stall and a toilet. The shower stall is made of glass, giving it a modern and sleek appearance. 
    # The toilet is situated next to the shower stall, and there is a sink nearby. The bathroom appears to be well-maintained and clean, with no visible signs of damage or wear. 
    # Overall, the condition of the bathroom is very good.

# Label: bathroom, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image features a small, white bathroom with a sink and a mirror. The sink is located in the middle of the room, and the mirror is placed above it. 
    # The bathroom appears to be clean and well-maintained. The overall condition of the bathroom is very good.



for item in labeled_images_fha:
    pil_image = item['image']
    label = item['label']
    caption = llava_pipeline(pil_image, prompt=llava_prompt, generate_kwargs={"max_new_tokens": max_new_tokens})
    print(f"Label: {label}, Generated Caption: {caption[0]['generated_text']}")


# Label: Subject Front, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image features a small, well-maintained house with a red door. The house is situated on a street, surrounded by greenery. There are two potted plants in the front yard, 
    # one on the left side and the other on the right side of the house. A chair is placed on the left side of the house, and a bench is located on the right side. 
    # The overall condition of the house appears to be good, with well-maintained outdoor furniture and a neatly arranged front yard.

# Label: Subject Rear, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image features a small, white house with a green roof, surrounded by a lush green field. The house is situated in a wooded area, with trees surrounding it. 
    # The house appears to be in a good condition, with no visible signs of damage or wear. The green roof adds a touch of color to the otherwise neutral-toned house. 
    # Overall, the house seems to be in a very good condition.

# Label: Subject Street, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image depicts a quiet residential street with a mix of cars and trucks parked along the side of the road. There are several cars and a truck parked on the side of the street, 
    # with some cars being closer to the foreground and others further back. The street appears to be a mix of residential and commercial properties, with a few houses and a truck parked near the curb.
    # The overall condition of the street seems to be neutral, as it is not overly busy or congested, but it is not completely empty either. The street appears to be a typical residential 
    # area with some commercial properties, and the parked vehicles do not seem to be causing any significant issues.

# Label: Interior, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image depicts a clean and well-organized kitchen. The countertops are clutter-free, and the appliances, such as the refrigerator, oven, and sink, are in good condition. 
    # The kitchen is equipped with various items, including multiple knives, cups, bowls, and bottles. There is also a vase placed on the counter. 
    # The overall condition of the kitchen is very good, as it is well-maintained and free of any visible dirt or clutter.

# Label: Interior, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image shows a dimly lit room with a hardwood floor. The room appears to be a living room or a bedroom, as it has a bed and a lamp. The lamp is placed on a stand, 
    # and there is a window in the room. The room is mostly empty, with only a few items present. The overall condition of the room can be rated as poor, as it seems unoccupied and 
    # lacks any significant decorations or furniture.

# Label: Interior, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image shows a bedroom with a bed that has a green comforter on it. The bed is positioned under a window, which allows natural light to enter the room. 
    # The room appears to be in a neutral condition, as there are no signs of extreme cleanliness or disarray. The bedroom seems to be a comfortable and functional space.

# Label: Interior, 
# Generated Caption: 
    # USER:  Describe what I am looking at it in detail. Additionally, rate the general condition from a scale of very good, good, neutral, poor, very poor. 
    # ASSISTANT: The image depicts a small, white bathroom with a toilet situated in the corner. The bathroom appears to be in a state of disrepair, with a dirty and unclean appearance. 
    # The toilet is the main focus of the scene, and the overall condition of the bathroom is poor.



In [None]:
######################################## FLORENCE #################################################### (ran in google collab)
# import timm
# import flash_attn
# import einops

# model_id = "microsoft/Florence-2-large"
# model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config = quantization_config, trust_remote_code=True)
# processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
# prompt = "<MORE_DETAILED_CAPTION>"

# def florence_prompt(pil_image, prompt):

#     inputs = processor(images=pil_image, text = prompt, return_tensors="pt")

#     generated_ids = model.generate(
#       input_ids=inputs["input_ids"],
#       pixel_values=inputs["pixel_values"],
#       max_new_tokens=1024,
#       do_sample=False,
#       num_beams=3,
#     )

#     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
#     parsed_answer = processor.post_process_generation(generated_text, task="<MORE_DETAILED_CAPTION>", image_size=(pil_image.width, pil_image.height))

#     return parsed_answer


# for item in labeled_images_sf:
#     pil_image = item['image']
#     label = item['label']
#     caption = florence_answer_prompt(pil_image, prompt)
#     print(f"Label: {label}, Generated Caption: {caption[0]['<MORE_DETAILED_CAPTION>']}")



#BASE MODEL (3mins)
# 'The image is a 3D rendering of a modern house with a wooden exterior and a sloping roof. The house has a white exterior with large windows and a balcony on the second floor. 
    # The balcony has a railing and there is a wooden walkway leading up to it. The walkway is surrounded by a wooden fence and there are trees and bushes in the background. 
    # The sky is blue with some clouds and the overall atmosphere of the image is peaceful and serene.'}

# 'The image is a 3D rendering of a modern house. The house is made of corrugated metal and has a sloping roof. It has multiple windows and doors, and a staircase leading up to the entrance.
    # The exterior of the house is painted in a dark brown color, and there is a small garden in front of it with shrubs and bushes. The sky is blue and there are a few clouds in the background. 
    # The overall atmosphere of the image is peaceful and serene.'}

# 'The image is a 3D rendering of a row of houses in a residential area. The houses are made of wood and have a modern design with a sloping roof and large windows. 
    # The front of the houses has a small balcony with a railing and a small garden with shrubs and trees. The sky is blue and there are a few clouds in the background. 
    # The street is lined with trees and there is a sidewalk on the right side of the image. The overall atmosphere of the scene is peaceful and serene.'}

# 'The image shows a modern kitchen with white cabinets and a wooden table in the center. The kitchen has a large window on the right side, allowing natural light to enter the space. 
    # The walls are painted in a light green color, and the floor is made of white tiles. On the left side of the image, there is a white countertop with a sink and a gas cooktop. 
    # Above the countertop, there are two pendant lights hanging from the ceiling. The overall style of the kitchen is minimalistic and contemporary.'}

# 'The image shows a modern and minimalistic living room and kitchen area. The room has a white ceiling with recessed lighting and a wooden floor. On the left side of the image, 
    # there is a white refrigerator and a green wall with a picture hanging on it. Next to the refrigerator, there are two pendant lights hanging from the ceiling. 
    # In the center of the room, there has a wooden table with a few items on it, including a vase with a green plant and a white vase. The walls are painted in a light blue color,
    # and there are a few black and white furniture pieces scattered around the room. A window with white curtains is visible in the background, letting in natural light.'}

# 'The image shows a modern living room with a large window on the left side. The room has a gray sofa with blue throw pillows, a round coffee table with a black metal frame, and two green side tables. 
    # On the right side of the room, there is a wooden dining table with four chairs. The floor is made of light-colored tiles, and the walls are painted white. 
    # The window has large panes of glass, allowing natural light to enter the room. Through the window, we can see a view of trees and a building outside. 
    # The overall atmosphere of the space is bright and airy.'}

# 'The image shows a corner of a room with a wooden floor and white walls. The room has a large window on the left side, allowing natural light to enter the space. 
    # On the right side, there is a sliding glass door that leads to a balcony with a view of trees and a fence. In the center of the room, there are two wooden benches with gray cushions and a
    # wooden headboard. Above the benches is a wooden pendant light hanging from the ceiling. The floor is made of light-colored tiles.'}

# 'The image shows a modern bathroom with a minimalist design. The walls are painted white and the floor is made of dark grey tiles. On the left side of the image, 
    # there is a white shelving unit with three shelves. The shelves are filled with folded towels and a potted plant. Next to the shelves, there are two white toilet seats. 
    # The shower area has a glass door with a silver handle and a showerhead. The overall color scheme of the bathroom is white, gray, and black.'}

# 'The image shows a small bathroom with a white vanity and a white sink. The vanity has two drawers and a mirror above it. On the right side of the vanity, there is a white shelf with 
    # a potted plant on top. The walls are painted in a light blue color and the floor is covered with a gray carpet. There is a door on the left side and a window on the far wall. 
    # The overall style of the bathroom is modern and minimalistic.'}



#LARGE MODEL (7mins)

# 'The image is a 3D rendering of a modern house with a wooden exterior. The house has a sloping roof and large windows that let in natural light. 
    # The front of the house is covered with a white wall and has a wooden deck with a railing. The deck is surrounded by a wooden fence and there are trees and bushes in the foreground. 
    # In the background, there are other houses and buildings visible. The sky is blue with some clouds. The overall atmosphere of the image is peaceful and serene.'}

# 'The image is a 3D rendering of a modern building with a unique design. The building is made of wood and has a sloping roof. It has multiple windows and doors, 
    # and a staircase leading up to the entrance. The entrance is covered with a metal railing, and there is a small garden in front of the building with shrubs and trees. 
    # The sky is blue and there are a few clouds in the background. The overall atmosphere of the image is peaceful and serene.'}

# 'The image is a 3D rendering of a residential area. It shows a row of houses on a street, with a wooden building on the left side of the image. The building has a sloping roof and
    # a large window on the front. The street is lined with trees and shrubs, and there is a small pond in the foreground. The sky is blue and there are a few clouds in the distance. 
    # The houses are painted in different colors and styles, with some having white walls and others having brown roofs. The overall atmosphere of the scene is peaceful and serene.'}

# 'The image is a 3D rendering of a modern kitchen and dining area. The kitchen has white cabinets and a wooden countertop with a sink and a gas stove. There is a large island in the center
    # of the room with a wooden table and chairs. Above the table, there are two pendant lights hanging from the ceiling. The walls are painted in a light green color and there is a window 
    # on the right side of the image that lets in natural light. The floor is made of light-colored tiles. The overall style of the kitchen is minimalistic and contemporary.'}

# 'The image shows a modern kitchen and living room in a small apartment. The kitchen has a wooden countertop with a sink and a potted plant on it. The walls are painted in a light green color
    # and there is a large window on the right side of the image that lets in natural light. On the left side, there are white cabinets and a white refrigerator. Above the kitchen, there is an 
    # orange pendant light hanging from the ceiling. The floor is made of light-colored tiles. The room has a gray sofa and a gray armchair in the background. The overall style of the room is 
    # minimalistic and contemporary.'}

# 'The image shows a modern living room with a large window on the left side. The room has a gray sofa with blue throw pillows and a round coffee table in the center. 
    # On the right side, there is a wooden dining table with four chairs around it. The floor is made of light-colored tiles and the walls are painted white. The ceiling is high and has a 
    # chandelier hanging from it. Through the large window, one can see a balcony with a view of trees and a building. The overall atmosphere of the room is bright and airy.'}

# 'The image shows a modern living room with a large window on the left side. The window has a sliding glass door that leads to a balcony with a view of trees and a fence. 
    # On the right side of the room, there is a wooden bench with a gray cushion and a white coffee table in front of it. Above the bench, there are two pendant lights hanging from the ceiling. 
    # The walls are painted in a light beige color and the floor is made of light-colored tiles. The room appears to be empty, with no furniture or decorations in sight.'}

# 'The image shows a modern bathroom with a minimalist design. The walls are painted white and the floor is covered with black tiles. On the left side of the image, 
    # there is a white ladder shelf with folded towels and a potted plant on top. Next to the ladder shelf, there are two white toilet seats. The shower area is enclosed by a glass door with 
    # a silver handle and a showerhead. The overall color scheme of the bathroom is white and gray.'}

# 'The image shows a modern bathroom with a white color scheme. The walls are painted white and there is a large mirror above the sink. The sink has a white countertop with a silver faucet and 
    # two drawers below it. On the right side of the sink, there are two white shelves with a vase of purple flowers on top. The floor is made of gray tiles. The bathroom has a door on the left
    # side and a small window on the right. The overall style of the bathroom is minimalistic and contemporary.'}



# for item in labeled_images_fha:
#     pil_image = item['image']
#     label = item['label']
#     caption = florence_answer_prompt(pil_image, prompt)
#     print(f"Label: {label}, Generated Caption: {caption[0]['<MORE_DETAILED_CAPTION>']}")







In [None]:
#APPENDINX

# pipeline captioner
# def captioner(data_dict, hf_model): 
#     captioner = pipeline("image-to-text", model = hf_model)

#     for item in data_dict:
#         pil_image = item['image']
#         label = item['label']
#         caption = captioner(pil_image)
#         print(f"Label: {label}, Generated Caption: {caption[0]['generated_text']}")
#         #pil_image.close()


######################################## BLIP LARGE #####################################################
# bootstrapping langugage-image pre-training = BLIP (general image captioning) - trained on clean and noisy web data 
# large built with a vit-l backbone --> winner at room type and can tell if it's modern but we need to see about condition

# NICE HOUSE
# blip_large_output_sf = captioner(labeled_images_sf,  "Salesforce/blip-image-captioning-large")
# blip_large_output_sf

# Label: subject front, Generated Caption: rendering of a modern home with a deck and a covered patio
# Label: subject rear, Generated Caption: rendering of a modern home with a garden and walkway
# Label: subject street, Generated Caption: rendering of a small house with a balcony and a balcony
# Label: kitchen, Generated Caption: there is a kitchen with a table and chairs in it
# Label: nook, Generated Caption: there is a table with chairs and a plant in a room
# Label: living/dining, Generated Caption: there is a living room with a couch, table, chairs and a television
# Label: bedroom, Generated Caption: there is a bed in a room with a lot of windows
# Label: bathroom, Generated Caption: there is a white bathroom with a toilet and a shower
# Label: bathroom, Generated Caption: there is a white bathroom with a sink and a mirror



# FHA
# large built with a vit-l backbone --> winner

# blip_large_output_fha = captioner(labeled_images_fha,  "Salesforce/blip-image-captioning-large")
# blip_large_output_fha

# NOTE: fairly accurate at room type but not great at providing detail / cant pick up on condition
# Label: Subject Front, Generated Caption: this is a house with a red door and a red door
# Label: Subject Rear, Generated Caption: there is a house that is sitting in the grass
# Label: Subject Street, Generated Caption: cars parked on the side of the road in a residential area
# Label: Interior, Generated Caption: there is a kitchen with a refrigerator, stove, sink and a window
# Label: Interior, Generated Caption: there is a small room with a television and a lamp
# Label: Interior, Generated Caption: there is a bed with a green blanket and a green blanket on it
# Label: Interior, Generated Caption: there is a bathroom with a toilet, sink, and bathtub


###################################  BLIP Base #######################################
# bootstrapping langugage-image pre-training = BLIP (general image captioning) - trained on clean and noisy web data (vit backbone)
# visual transformer is image encoder and text transformer is encoder-decoder

# blip_base_output_sf = captioner(labeled_images_sf,  "Salesforce/blip-image-captioning-large")
# blip_base_output_sf


# Label: subject front, Generated Caption: a rendering of a house on a hill
# Label: subject rear, Generated Caption: a rendering of a house with a garden and a walkway
# Label: subject street, Generated Caption: a rendering of a small house in the middle of a town --> town is interesting lol
# Label: kitchen, Generated Caption: a kitchen and dining area in a modern apartment
# Label: nook, Generated Caption: a room with a table and chairs in it
# Label: living/dining, Generated Caption: a living room with a couch and a table
# Label: bedroom, Generated Caption: a room with a couch and a window
# Label: bathroom, Generated Caption: a bathroom with a toilet and shelves
# Label: bathroom, Generated Caption: a white bed --> wrong





####################  generativeimage2text(git) from microsoft #############################
# base was terrible; large was too large and i think blip large was better anyway

# git_output_sf = captioner(labeled_images_sf,  "microsoft/git-base")
# git_output_sf

# Label: subject front, Generated Caption: the building is white
# Label: subject rear, Generated Caption: the house in the middle of the street
# Label: subject street, Generated Caption: the house is the pink one in the middle
# Label: kitchen, Generated Caption: white kitchen cabinets
# Label: nook, Generated Caption: the window in the room
# Label: living/dining, Generated Caption: a large window
# Label: bedroom, Generated Caption: the house is on the market for $ 3. 5 million.
# Label: bathroom, Generated Caption: a white shower stall
# Label: bathroom, Generated Caption: a mirror on the wall


##########################################  VIT GP2 ######################################
#vit gpt2 (multimodal - combined vision transformer and gpt2 for text generation)

# vit_gpt2_output_sf = captioner(labeled_images_sf,  "nlpconnect/vit-gpt2-image-captioning")
# vit_gpt2_output_sf


#NOTE: This was fine but BLIP was better
# Label: subject front, Generated Caption: a white and black dog standing in front of a fence --> wrong
# Label: subject rear, Generated Caption: a large building with a large window on top 
# Label: subject street, Generated Caption: a building with a fence and a bench in front of it 
# Label: kitchen, Generated Caption: a kitchen with a table and a stove 
# Label: nook, Generated Caption: a kitchen with a table and a plant 
# Label: living/dining, Generated Caption: a living room with a couch, chairs, and a table 
# Label: bedroom, Generated Caption: a large room with a couch and a window 
# Label: bathroom, Generated Caption: a bathroom with a toilet and a shower 
# Label: bathroom, Generated Caption: a bathroom with a sink, toilet and bathtub 

In [None]:
######################################## FLORENCE ###################################################

# From Microsoft. Note I did not use this mode because: 
# ImportError: This modeling file requires the following packages that were not found in your environment: einops, flash_attn. Run `pip install einops flash_attn`
    # So i pip install BUT then i get this error in the terminal because no .git in sandbox --> fatal: not a git repository (or any of the parent directories): 
    # .git/tmp/pip-install-43kvk0cx/flash-attn_86c0cdde231f434284c2f2f8373f2f1b/setup.py:95: UserWarning: flash_attn was requested,
    # but nvcc was not found.  Are you sure your environment has nvcc available?  If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, 
    # only images whose names contain 'devel' will provide nvcc.



# import requests
# from PIL import Image
# from transformers import AutoProcessor, AutoModelForCausalLM 

# device = "cuda:0" if torch.cuda.is_available() else "cpu"
# torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

# model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-base")
# processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", device_map="auto", trust_remote_code=True)

# prompt = "<OD>"

# inputs = processor(text=prompt, images=labeled_images_sf[0], return_tensors="pt").to(device, torch_dtype)

# generated_ids = model.generate(
#     input_ids=inputs["input_ids"],
#     pixel_values=inputs["pixel_values"],
#     max_new_tokens=1024,
#     do_sample=False,
#     num_beams=3,
# )
# generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]

# parsed_answer = processor.post_process_generation(generated_text, task="<OD>", image_size=(image.width, image.height))

# print(parsed_answer)
