In [1]:
from PIL import Image
import requests
from transformers import AutoModelForCausalLM
from transformers import AutoProcessor
import pandas as pd


model_id = "microsoft/Phi-3-vision-128k-instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto"
)

processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
import re

def get_first_dict_from_string(json_str):
    try:
        # Define a regex pattern to match a dictionary (starting with '{' and ending with '}')
        pattern = r'\{.*?\}'
        # Find all matches in the string
        matches = re.findall(pattern, json_str, re.DOTALL)
        if matches:
            # Load the first match as JSON
            return json.loads(matches[0])
        else:
            raise ValueError("No dictionary found in the JSON data.")
    except json.JSONDecodeError as e:
        raise ValueError("Invalid JSON data.") from e


In [None]:
import json

def run(text, img_path):
    messages = [
        {"role": "user", "content": f"<|image_1|>\n{text}"},
    ]

    # url = "/home/ratish/repos/words/data/Words3/IMG_7017.PNG"
    image = Image.open(img_path)
    # image.show()

    # image = Image.open(requests.get(url, stream=True).raw)

    prompt = processor.tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = processor(prompt, [image], return_tensors="pt").to("cuda:0")

    generation_args = {
        "max_new_tokens": 500,
        "temperature": 0.0,
        "do_sample": False,
    }

    generate_ids = model.generate(
        **inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args
    )

    # remove input tokens
    generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
    response = processor.batch_decode(
        generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )[0]

    return response
    
   
# Define the folder path
import os
folder_path = "/home/ratish/repos/words/data/Words3"
files = os.listdir(folder_path)

rows = []
for i, f in enumerate(files):
    img_path = f"{folder_path}/{f}"
    text = """
You will see a screenshot containing a word and its meaning. 
Your job is to provide the word and its meaning as a json:
   {
     "word": "WORD",
     "meaning": "Meaning"
    }
    Try to capture all different meanings llisted in the screenshot. Only return the JSON, nothing else
"""

    try:
        print(f"\n------------ {i} --------------")
        response = run(text, img_path)
        out = json.loads(response)
        
    except Exception as e:
        print("failed first time")
        try:
            out = get_first_dict_from_string(response)
        except Exception as e:
            print(f"Failed: {e} {response}")
            continue
            
    print(out)
    rows.append(out)

        
df = pd.DataFrame(rows)
df.to_csv("words.csv", index=False)


------------ 0 --------------




{'word': 'respite', 'meaning': 'a short rest or period of relief from something'}

------------ 1 --------------
{'word': 'Pollyannaish', 'meaning': 'overly optimistic'}

------------ 2 --------------
{'word': 'reticent', 'meaning': 'unwilling or reluctant to draw attention to oneself, or restrained and formal in manner. For example, someone who is reticent about their feelings may like to keep them to themselves, or be quiet in rowdy groups where everyone is talking over each other.'}

------------ 3 --------------
{'word': 'sullen', 'meaning': "bad-tempered and sulky; gloomy. 'a sullen pout'", 'similar': ['surly', 'sulky', 'pouting'], 'definition': 'adjective: sullen', 'usage example': 'a sullen pout', 'synonym': ['anglo-norman french'], 'origin': 'anglo-norman french', 'plural noun': 'sullens', 'additional meaning': 'a sulky or depressed mood.'}

------------ 4 --------------
{'word': 'malleable', 'meaning': 'The clay became malleable and easy to work with after a little water was a

failed first time
{'word': 'rakish', 'meaning': "having a streamlined appearance that suggests speed, or being dashingly or carelessly unconventional. For example, you might describe a ship as rakish, or say that someone's clothes are rakish. In nautical terms, rakish can also refer to a ship or boat with lines that suggest speed."}

------------ 49 --------------
{'word': 'expound', 'meaning': 'present and explain (a theory or idea) systematically and in detail.', 'similar': ['present', 'put forward', 'set forth'], 'usage example': 'he was expounding a powerful argument', 'dictionary': {'definitions': [{'source': 'Oxford Languages', 'definition': 'expound', 'examples': ['he was expounding a powerful argument']}], 'synonyms': ['explain', 'interpret', 'explicate'], 'antonyms': ['explain', 'interpret', 'explicate']}}

------------ 50 --------------
failed first time
{'word': 'languid', 'meaning': '1. (of a person, manner, or gesture) displaying or having a disinclination for physical exe

{'word': 'elusive', 'meaning': 'elusive'}

------------ 92 --------------
{'word': 'panache', 'meaning': 'distinctive style'}

------------ 93 --------------
failed first time
{'word': 'concomitantly', 'meaning': 'at the same time, or along with something else, as a related feature or circumstance.'}

------------ 94 --------------
{'word': 'predilection', 'meaning': 'strong liking'}

------------ 95 --------------
failed first time
{'word': 'effaced', 'meaning': 'to wipe out, erase, or obliterate something.'}

------------ 96 --------------
failed first time
{'word': 'factious', 'meaning': 'relating or inclined to a state of faction.'}

------------ 97 --------------
{'word': 'polemic', 'meaning': 'verbal attack'}

------------ 98 --------------
failed first time
{'word': 'sybarite', 'meaning': 'a person who is self-indulgent in their fondness for sensuous luxury.'}

------------ 99 --------------
failed first time
{'word': 'slapdash', 'meaning': "done too hurriedly and carelessly. 'h

failed first time
{'word': 'alacrity', 'meaning': 'brisk and cheerful readiness; she accepted the invitation with alacrity'}

------------ 136 --------------
{'word': 'extenuating', 'meaning': 'making less guilty'}

------------ 137 --------------
failed first time
{'word': 'Truculence', 'meaning': "is a noun that means the quality of being aggressively hostile or belligerent. It can also mean being scathingly harsh or vitriolic. For example, 'His good mood gradually gave way to truculence when she didn't pay him any attention'.", 'synonym': 'aggressiveness', 'pronunciation': 'tru-kul-uh-nsee', 'definition': 'the quality or attitude of being aggressively hostile or belligerent.', 'example': "His good mood gradually gave way to truculence when she didn't pay him any attention."}

------------ 138 --------------
{'word': 'pugnacious', 'meaning': 'combative'}

------------ 139 --------------
failed first time
{'word': 'rancor', 'meaning': 'a deep, bitter anger or resentment that is rankli

{'word': 'impudent', 'meaning': 'showing disregard or scorn for others, or being disrespectful or insolent. For example, you might describe a student as impudent if they behave in a way that lacks respect.'}

------------ 170 --------------
{'word': 'capricious', 'meaning': 'having an unpredictable, sudden, and unaccountable change in attitude or behavior. For example, you might describe a child, a tyrant, a bride who leaves her groom at the altar, or weather as capricious. Synonyms of capricious include impulsive, whimsical, variable, flighty, mercurial, and fickle. Antonyms of capricious include steady, constant, and consistent.'}

------------ 171 --------------
{'word': 'leery', 'meaning': 'Without checking his references and talking to previous employers, I am leery of hiring the candidate.'}

------------ 172 --------------
failed first time
{'word': 'ameliorate', 'meaning': 'make (something bad or unsatisfactory) better.'}

------------ 173 --------------
{'word': 'immure', 'mea

{'word': 'philistine', 'meaning': 'hater of culture'}

------------ 220 --------------
{'word': 'voracious', 'meaning': 'wanting or devouring great quantities of food.', 'similar': ['insatiable', 'unquenchable']}

------------ 221 --------------
{'word': 'culpability', 'meaning': 'state of guilt'}

------------ 222 --------------
failed first time
{'word': 'besiege', 'meaning': 'to surround with a force or attack, as an army'}

------------ 223 --------------
{'word': 'prodigious', 'meaning': 'great in size, extent'}

------------ 224 --------------
{'word': 'edifying', 'meaning': 'enlightening'}

------------ 225 --------------
failed first time
{'word': 'impertinent', 'meaning': '1. not showing proper respect; rude.', 'similar': ['rude', 'insolent', 'impolite']}

------------ 226 --------------
failed first time
{'word': 'smitten', 'meaning': 'deeply affected by strong feelings of attraction, affection, or infatuation.'}

------------ 227 --------------
failed first time
{'word': 'be

failed first time
{'word': 'cupidity', 'meaning': "greed for money or possessions. 'new wealth, however tainted by cupidity and egoism, tends to be favorable for the arts'"}

------------ 266 --------------
{'word': 'hedge', 'meaning': 'to limit a statement'}

------------ 267 --------------
{'word': 'travesty', 'meaning': 'What I expected to be an intelligent, nuanced historical documentary turned out to be a poorly-produced travesty of the form.'}

------------ 268 --------------
{'word': 'epigram', 'meaning': "a pithy saying or remark expressing an idea in a clever and amusing way. 'a Wildean epigram'"}

------------ 269 --------------
failed first time
{'word': 'prerogative', 'meaning': 'a right or privilege exclusive to a particular individual or class.'}

------------ 270 --------------
failed first time
{'word': 'jarred', 'meaning': 'send a painful or damaging shock through (something, especially a part of the body).'}

------------ 271 --------------
failed first time
{'word': 