## Experiment 1 Molmo: Cross-Modal Probability Analysis

Set-up

In [None]:
from huggingface_hub import login
import torch

login()

HF_TOKEN = "your_huggingface_token_here"  # Replace with your Hugging Face token

# This will be removed in the final version - LLaMA requires access from Meta
hf_key = HF_TOKEN

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
import random
import pandas as pd
import numpy as np
import os
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM, GenerationConfig
from tqdm import tqdm
import re

Label selection

In [3]:
list1 = [
    "looloo", "loolah", "looloh", "loonoo", "loonah", "loonoh", "loomoo", "loomah", "loomoh",
    "lahloo", "lahlah", "lahloh", "lahnoo", "lahnah", "lahnoh", "lahmoo", "lahmah", "lahmoh",
    "lohloo", "lohlah", "lohloh", "lohnoo", "lohnah", "lohnoh", "lohmoo", "lohmah", "lohmoh",
    "nooloo", "noolah", "nooloh", "noonoo", "noonah", "noonoh", "noomoo", "noomah", "noomoh",
    "nahloo", "nahlah", "nahloh", "nahnoo", "nahnah", "nahnoh", "nahmoo", "nahmah", "nahmoh",
    "nohloo", "nohlah", "nohloh", "nohnoo", "nohnah", "nohnoh", "nohmoo", "nohmah", "nohmoh",
    "mooloo", "moolah", "mooloh", "moonoo", "moonah", "moonoh", "moomoo", "moomah", "moomoh",
    "mahloo", "mahlah", "mahloh", "mahnoo", "mahnah", "mahnoh", "mahmoo", "mahmah", "mahmoh",
    "mohloo", "mohlah", "mohloh", "mohnoo", "mohnah", "mohnoh", "mohmoo", "mohmah", "mohmoh"
]

list2 = [
    "teetee", "teetuh", "teetay", "teekee", "teekuh", "teekay", "teepee", "teepuh", "teepay",
    "tuhtee", "tuhtuh", "tuhtay", "tuhkee", "tuhkuh", "tuhkay", "tuhpee", "tuhpuh", "tuhpay",
    "taytee", "taytuh", "taytay", "taykee", "taykuh", "taykay", "taypee", "taypuh", "taypay",
    "keetee", "keetuh", "keetay", "keekee", "keekuh", "keekay", "keepee", "keepuh", "keepay",
    "kuhtee", "kuhtuh", "kuhtay", "kuhkee", "kuhkuh", "kuhkay", "kuhpee", "kuhpuh", "kuhpay",
    "kaytee", "kaytuh", "kaytay", "kaykee", "kaykuh", "kaykay", "kaypee", "kaypuh", "kaypay",
    "peetee", "peetuh", "peetay", "peekee", "peekuh", "peekay", "peepee", "peepuh", "peepay",
    "puhtee", "puhtuh", "puhtay", "puhkee", "puhkuh", "puhkay", "puhpee", "puhpuh", "puhpay",
    "paytee", "paytuh", "paytay", "paykee", "paykuh", "paykay", "paypee", "paypuh", "paypay"
]

sample1 = random.sample(list1, 2)
sample2 = random.sample(list2, 2)

if False:
    final_list = sample1 + sample2

    print(final_list)

In [4]:
final_list = ['nohloh', 'loomoo', 'kuhpay', 'puhkay']

In [None]:
class CrossModalAnalyzer:
    def __init__(self, image_folder="images"):
        """
        Initialize the analyzer with the path to the image folder.
        
        Args:
            image_folder: Path to the folder containing images
        """
        self.image_folder = image_folder
        self.results = []
        self.models = {}
        
    def load_model(self, model_name):
        """
        Load a multimodal model.
        
        Args:
            model_name: Name of the model to load ('molmo')
        """
        if model_name == 'molmo':
            processor = AutoProcessor.from_pretrained(
                'allenai/Molmo-7B-D-0924',
                trust_remote_code=True,
                torch_dtype=torch.float16,
                device_map='auto'
            )

            model = AutoModelForCausalLM.from_pretrained(
                'allenai/Molmo-7B-D-0924',
                trust_remote_code=True,
                torch_dtype=torch.float16,
                device_map='auto'
            )

            self.models['molmo'] = {'model': model, 'processor': processor}
        else:
            raise ValueError(f"Unsupported model: {model_name}, Model not Found.")

    def extract_image_type_from_filename(self, filename):
        """
        Extract the image type (curved/jagged) from the filename.
        
        Args:
            filename: Image filename
            
        Returns:
            Image type as a string ('curved' or 'jagged')
        """
        filename_lower = filename.lower()
        
        if any(shape in filename_lower for shape in ['curved', 'round', 'bouba', 'maluma']):
            return 'curved'
        elif any(shape in filename_lower for shape in ['jagged', 'sharp', 'kiki', 'takete']):
            return 'jagged'
        else:
            # * Exta support if more data is added
            # If no clear indicator, try to find using regex for more complex patterns
            # For example, if the filename format is like "img_curved_001.jpg"
            match = re.search(r'_(curved|jagged|round|sharp)_', filename_lower)
            if match:
                shape_group = match.group(1)
                if shape_group in ['curved', 'round']:
                    return 'curved'
                elif shape_group in ['jagged', 'sharp']:
                    return 'jagged'
            
            # If still no match, raise an error or use a default
            raise ValueError(f"Cannot determine image type from filename: {filename}")
    
    def scan_image_folder(self):
        """
        Scan the image folder and extract image paths and types.
        
        Returns:
            Tuple of (image_paths, image_types)
        """
        image_paths = []
        image_types = []
        
        for filename in os.listdir(self.image_folder):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                image_path = os.path.join(self.image_folder, filename)
                
                try:
                    image_type = self.extract_image_type_from_filename(filename)
                    
                    image_paths.append(image_path)
                    image_types.append(image_type)
                except ValueError as e:
                    print(f"Warning: {str(e)}")
        
        return image_paths, image_types
    
    def prepare_dataset(self, text_prompts, text_types, image_paths=None, image_types=None):
        """
        Prepare dataset of all possible image-text pairs.
        
        Args:
            text_prompts: List of text prompts
            text_types: List of text types (bouba-like/kiki-like)
            image_paths: List of paths to images (if None, scan the image folder)
            image_types: List of image types (if None, extract from filenames)
            
        Returns:
            List of dicts containing all possible pairs
        """
        if image_paths is None or image_types is None:
            image_paths, image_types = self.scan_image_folder()
            
        pairs = []
        for i, img_path in enumerate(image_paths):
            img_type = image_types[i]
            for j, text in enumerate(text_prompts):
                text_type = text_types[j]
                pairs.append({
                    'image_path': img_path,
                    'text_prompt': text,
                    'image_type': img_type,
                    'text_type': text_type,
                    'image_filename': os.path.basename(img_path)
                })
        return pairs
    
    def compute_molmo_probabilities(self, image_path, text_prompt):
        """
        Compute probability scores for Molmo model.
        
        Args:
            image_path: Path to the image
            text_prompt: Text prompt to pair with image
            
        Returns:
            Probability score for the pairing
        """
        model_info = self.models['molmo']
        model = model_info['model']
        processor = model_info['processor']
        
        image = Image.open(image_path).convert('RGB')
        
        # Prompt setup
        prompt = (
            f"On a scale from 0 to 100, how well does the abstract word '{text_prompt}' describe the following image? "
            "0 means the word and image have no connection and 100 means the word perfectly fits the image. "
            "Answer only with a single number between 0 and 100. Do not include any other words."
        )
        
        inputs = processor.process(
            images=[image],
            text=prompt
        )

        inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}

        # Generate with output scores to compute probabilities
        with torch.no_grad():
            try:
                with torch.autocast(device_type="cuda", enabled=True, dtype=torch.float16):
                    outputs = model.generate_from_batch(
                        inputs,
                        GenerationConfig(max_new_tokens=5, stop_strings="<|endoftext|>"),
                        tokenizer=processor.tokenizer,
                        output_scores=True,
                        return_dict_in_generate=True,
                        temperature=0.7,
                        do_sample=True,
                        top_p=0.9,
                        top_k=40
                    )

                    generated_tokens = outputs[0][inputs['input_ids'].size(1):]
                    generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)

                    scores = outputs.scores  # List of logits at each token generation step

                    generated_text = processor.tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
                    print("Model output:", generated_text)

                    ans = generated_text.split()
                    print("The stripped generated text is: ", ans)
                    ans = ans[-1]
                    print("The probability is: ", ans)

                    # Explicitly clean up memory
                    del outputs, inputs, scores
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()

                    prob = float(ans)

                    return prob

            except Exception as e:
                raise Exception(f"Error in Molmo generation: {str(e)}")
        
    def analyze(self, model_name, dataset):
        """
        Analyze all image-text pairs for a given model.
        
        Args:
            model_name: Name of the model to use
            dataset: List of image-text pairs
            
        Returns:
            DataFrame with results
        """
        if model_name != 'molmo':
            raise ValueError(f"Unsupported model: {model_name}. This analyzer only supports 'molmo'.")
            
        if model_name not in self.models:
            self.load_model(model_name)
            
        batch_size = 4 
        for i in range(0, len(dataset), batch_size):
            batch = dataset[i:i+batch_size]
            
            for pair in tqdm(batch, desc=f"Processing {model_name} batch {i//batch_size + 1}/{(len(dataset)-1)//batch_size + 1}"):
                image_path = pair['image_path']
                text_prompt = pair['text_prompt']
                image_type = pair['image_type']
                text_type = pair['text_type']
                
                try:
                    probability = self.compute_molmo_probabilities(image_path, text_prompt)
                    
                    self.results.append({
                        'model': model_name,
                        'image_path': image_path,
                        'image_filename': pair.get('image_filename', os.path.basename(image_path)),
                        'text_prompt': text_prompt,
                        'image_type': image_type,
                        'text_type': text_type,
                        'probability': probability,
                        'congruent': (image_type == 'curved' and text_type == 'bouba-like') or 
                                    (image_type == 'jagged' and text_type == 'kiki-like')
                    })
                        
                except Exception as e:
                    print(f"Error processing {image_path} with {text_prompt}: {str(e)}")
                    continue
                    
            # Clear CUDA cache between batches to prevent memory buildup
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                
        return pd.DataFrame(self.results)

    def calculate_metrics(self, results_df=None):
        """
        Calculate metrics for each model.
        
        Args:
            results_df: Results DataFrame (if None, uses self.results)
            
        Returns:
            DataFrame with metrics
        """
        if results_df is None:
            results_df = pd.DataFrame(self.results)
            
        metrics = []
        model_name = 'molmo'
        model_df = results_df[results_df['model'] == model_name]
        
        # Calculate average probabilities for congruent and incongruent pairs
        congruent_prob = model_df[model_df['congruent']]['probability'].mean()
        incongruent_prob = model_df[~model_df['congruent']]['probability'].mean()
        
        # Calculate bouba-kiki effect strength (difference between congruent and incongruent)
        effect_strength = congruent_prob - incongruent_prob
        
        # Calculate bias towards yes/no (centered around 50%)
        avg_probability = model_df['probability'].mean()
        
        # Calculate by image and text type
        curved_bouba = model_df[(model_df['image_type'] == 'curved') & 
                            (model_df['text_type'] == 'bouba-like')]['probability'].mean()
        curved_kiki = model_df[(model_df['image_type'] == 'curved') & 
                            (model_df['text_type'] == 'kiki-like')]['probability'].mean()
        jagged_bouba = model_df[(model_df['image_type'] == 'jagged') & 
                            (model_df['text_type'] == 'bouba-like')]['probability'].mean()
        jagged_kiki = model_df[(model_df['image_type'] == 'jagged') & 
                            (model_df['text_type'] == 'kiki-like')]['probability'].mean()
        
        # Store metrics
        metrics.append({
            'model': model_name,
            'congruent_probability': congruent_prob,
            'incongruent_probability': incongruent_prob,
            'effect_strength': effect_strength,
            'overall_bias': avg_probability - 50, 
            'curved_bouba_prob': curved_bouba,
            'curved_kiki_prob': curved_kiki,
            'jagged_bouba_prob': jagged_bouba,
            'jagged_kiki_prob': jagged_kiki,
            'n_samples': len(model_df)
        })
            
        return pd.DataFrame(metrics)
    
    def summarize_per_image(self, results_df=None):
        if results_df is None:
            results_df = pd.DataFrame(self.results)

        image_summary = (
            results_df
            .groupby('image_path')
            .agg(
                n_pairs=('probability', 'count'),
                avg_score=('probability', 'mean'),
                variance=('probability', 'var'),
                std_dev=('probability', 'std'),
                curved=('image_type', lambda x: x.iloc[0] == 'curved'),
                jagged=('image_type', lambda x: x.iloc[0] == 'jagged')
            )
            .reset_index()
        )   

        return image_summary

if __name__ == "__main__":
    # Initialize
    analyzer = CrossModalAnalyzer(image_folder="images")
    text_prompts = final_list
    text_types = ["bouba-like", "bouba-like", "kiki-like", "kiki-like"]

    # Prepare dataset
    dataset = analyzer.prepare_dataset(text_prompts, text_types)

    # Run analysis
    analyzer.analyze("molmo", dataset)
    
    ## Change folder to [1,2,3]
    # Save results
    metrics = analyzer.calculate_metrics()
    metrics.to_csv("10/molmo_metrics.csv", index=False)

    pd.DataFrame(analyzer.results).to_csv("10/molmo_results.csv", index=False)

    image_stats = analyzer.summarize_per_image()
    pd.DataFrame(image_stats).to_csv("10/image_stats.csv", index=False)
    
    print("Analysis complete. Results saved to CSV files and visualizations saved as PNG files.")

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Processing molmo batch 1/34:  25%|██▌       | 1/4 [00:13<00:39, 13.12s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 1/34:  50%|█████     | 2/4 [00:24<00:23, 11.91s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 1/34:  75%|███████▌  | 3/4 [00:35<00:11, 11.52s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 1/34: 100%|██████████| 4/4 [00:46<00:00, 11.58s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 2/34:  25%|██▌       | 1/4 [00:08<00:26,  8.86s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 2/34:  50%|█████     | 2/4 [00:17<00:17,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 2/34:  75%|███████▌  | 3/4 [00:26<00:08,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 2/34: 100%|██████████| 4/4 [00:35<00:00,  8.85s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 3/34:  25%|██▌       | 1/4 [00:09<00:27,  9.33s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 3/34:  50%|█████     | 2/4 [00:18<00:18,  9.34s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 3/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.34s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 3/34: 100%|██████████| 4/4 [00:37<00:00,  9.34s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 95
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '95']
The probability is:  95


Processing molmo batch 4/34:  25%|██▌       | 1/4 [00:09<00:28,  9.35s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 4/34:  50%|█████     | 2/4 [00:18<00:18,  9.34s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 37
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '37']
The probability is:  37


Processing molmo batch 4/34:  75%|███████▌  | 3/4 [00:27<00:09,  9.32s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 4/34: 100%|██████████| 4/4 [00:37<00:00,  9.33s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 35
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '35']
The probability is:  35


Processing molmo batch 5/34:  25%|██▌       | 1/4 [00:09<00:27,  9.32s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 5/34:  50%|█████     | 2/4 [00:18<00:18,  9.33s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 5/34:  75%|███████▌  | 3/4 [00:27<00:09,  9.33s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 35
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '35']
The probability is:  35


Processing molmo batch 5/34: 100%|██████████| 4/4 [00:37<00:00,  9.33s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 6/34:  25%|██▌       | 1/4 [00:09<00:29,  9.69s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 6/34:  50%|█████     | 2/4 [00:19<00:19,  9.70s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 6/34:  75%|███████▌  | 3/4 [00:29<00:09,  9.69s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 6/34: 100%|██████████| 4/4 [00:38<00:00,  9.71s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 7/34:  25%|██▌       | 1/4 [00:09<00:28,  9.35s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 95
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '95']
The probability is:  95


Processing molmo batch 7/34:  50%|█████     | 2/4 [00:18<00:18,  9.35s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 7/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.38s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 95
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '95']
The probability is:  95


Processing molmo batch 7/34: 100%|██████████| 4/4 [00:37<00:00,  9.39s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 95
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '95']
The probability is:  95


Processing molmo batch 8/34:  25%|██▌       | 1/4 [00:09<00:28,  9.41s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 8/34:  50%|█████     | 2/4 [00:18<00:18,  9.41s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 8/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.42s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 95
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '95']
The probability is:  95


Processing molmo batch 8/34: 100%|██████████| 4/4 [00:37<00:00,  9.42s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 42
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '42']
The probability is:  42


Processing molmo batch 9/34:  25%|██▌       | 1/4 [00:09<00:28,  9.41s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 9/34:  50%|█████     | 2/4 [00:18<00:18,  9.43s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 9/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.42s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 9/34: 100%|██████████| 4/4 [00:37<00:00,  9.41s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 55
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '55']
The probability is:  55


Processing molmo batch 10/34:  25%|██▌       | 1/4 [00:09<00:28,  9.41s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 10/34:  50%|█████     | 2/4 [00:18<00:18,  9.41s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 10/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.41s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 10/34: 100%|██████████| 4/4 [00:37<00:00,  9.42s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 11/34:  25%|██▌       | 1/4 [00:10<00:31, 10.53s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 42
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '42']
The probability is:  42


Processing molmo batch 11/34:  50%|█████     | 2/4 [00:21<00:21, 10.52s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 11/34:  75%|███████▌  | 3/4 [00:31<00:10, 10.52s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 55
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '55']
The probability is:  55


Processing molmo batch 11/34: 100%|██████████| 4/4 [00:42<00:00, 10.52s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 12/34:  25%|██▌       | 1/4 [00:10<00:32, 10.73s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 12/34:  50%|█████     | 2/4 [00:21<00:21, 10.81s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 12/34:  75%|███████▌  | 3/4 [00:32<00:10, 10.75s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 12/34: 100%|██████████| 4/4 [00:42<00:00, 10.75s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 13/34:  25%|██▌       | 1/4 [00:10<00:31, 10.51s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 13/34:  50%|█████     | 2/4 [00:21<00:21, 10.52s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 13/34:  75%|███████▌  | 3/4 [00:31<00:10, 10.58s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 55
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '55']
The probability is:  55


Processing molmo batch 13/34: 100%|██████████| 4/4 [00:42<00:00, 10.52s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 14/34:  25%|██▌       | 1/4 [00:09<00:28,  9.67s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 14/34:  50%|█████     | 2/4 [00:19<00:19,  9.62s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 14/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.60s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 14/34: 100%|██████████| 4/4 [00:38<00:00,  9.61s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 15/34:  25%|██▌       | 1/4 [00:09<00:28,  9.65s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 15/34:  50%|█████     | 2/4 [00:19<00:19,  9.61s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 42
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '42']
The probability is:  42


Processing molmo batch 15/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.60s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 95
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '95']
The probability is:  95


Processing molmo batch 15/34: 100%|██████████| 4/4 [00:38<00:00,  9.61s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 95
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '95']
The probability is:  95


Processing molmo batch 16/34:  25%|██▌       | 1/4 [00:09<00:29,  9.67s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 16/34:  50%|█████     | 2/4 [00:19<00:19,  9.62s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 16/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.61s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 16/34: 100%|██████████| 4/4 [00:38<00:00,  9.64s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 17/34:  25%|██▌       | 1/4 [00:09<00:28,  9.66s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 17/34:  50%|█████     | 2/4 [00:19<00:19,  9.64s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 45
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '45']
The probability is:  45


Processing molmo batch 17/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.61s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 17/34: 100%|██████████| 4/4 [00:38<00:00,  9.63s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 18/34:  25%|██▌       | 1/4 [00:09<00:28,  9.65s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 18/34:  50%|█████     | 2/4 [00:19<00:19,  9.61s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 42
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '42']
The probability is:  42


Processing molmo batch 18/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.60s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 18/34: 100%|██████████| 4/4 [00:38<00:00,  9.61s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 19/34:  25%|██▌       | 1/4 [00:09<00:27,  9.27s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 19/34:  50%|█████     | 2/4 [00:18<00:18,  9.27s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 19/34:  75%|███████▌  | 3/4 [00:27<00:09,  9.27s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 19/34: 100%|██████████| 4/4 [00:37<00:00,  9.27s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 20/34:  25%|██▌       | 1/4 [00:09<00:27,  9.29s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 55
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '55']
The probability is:  55


Processing molmo batch 20/34:  50%|█████     | 2/4 [00:18<00:18,  9.31s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 20/34:  75%|███████▌  | 3/4 [00:27<00:09,  9.30s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 20/34: 100%|██████████| 4/4 [00:37<00:00,  9.30s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 21/34:  25%|██▌       | 1/4 [00:09<00:27,  9.29s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 21/34:  50%|█████     | 2/4 [00:18<00:18,  9.29s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 21/34:  75%|███████▌  | 3/4 [00:27<00:09,  9.28s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 21/34: 100%|██████████| 4/4 [00:37<00:00,  9.28s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 22/34:  25%|██▌       | 1/4 [00:09<00:28,  9.64s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 22/34:  50%|█████     | 2/4 [00:19<00:19,  9.63s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 42
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '42']
The probability is:  42


Processing molmo batch 22/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.64s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 22/34: 100%|██████████| 4/4 [00:38<00:00,  9.62s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 23/34:  25%|██▌       | 1/4 [00:08<00:26,  8.78s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 23/34:  50%|█████     | 2/4 [00:17<00:17,  8.81s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 23/34:  75%|███████▌  | 3/4 [00:26<00:08,  8.81s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 23/34: 100%|██████████| 4/4 [00:35<00:00,  8.81s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 30
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '30']
The probability is:  30


Processing molmo batch 24/34:  25%|██▌       | 1/4 [00:09<00:27,  9.31s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 35
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '35']
The probability is:  35


Processing molmo batch 24/34:  50%|█████     | 2/4 [00:18<00:18,  9.32s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 35
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '35']
The probability is:  35


Processing molmo batch 24/34:  75%|███████▌  | 3/4 [00:27<00:09,  9.32s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 35
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '35']
The probability is:  35


Processing molmo batch 24/34: 100%|██████████| 4/4 [00:37<00:00,  9.32s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 25/34:  25%|██▌       | 1/4 [00:09<00:27,  9.31s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 25/34:  50%|█████     | 2/4 [00:18<00:18,  9.33s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 25/34:  75%|███████▌  | 3/4 [00:27<00:09,  9.32s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 25/34: 100%|██████████| 4/4 [00:37<00:00,  9.32s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 26/34:  25%|██▌       | 1/4 [00:09<00:29,  9.68s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 26/34:  50%|█████     | 2/4 [00:19<00:19,  9.68s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 26/34:  75%|███████▌  | 3/4 [00:29<00:09,  9.76s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 26/34: 100%|██████████| 4/4 [00:38<00:00,  9.73s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 27/34:  25%|██▌       | 1/4 [00:09<00:29,  9.70s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 27/34:  50%|█████     | 2/4 [00:19<00:19,  9.69s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 27/34:  75%|███████▌  | 3/4 [00:29<00:09,  9.71s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 60
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '60']
The probability is:  60


Processing molmo batch 27/34: 100%|██████████| 4/4 [00:38<00:00,  9.70s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 28/34:  25%|██▌       | 1/4 [00:09<00:29,  9.73s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 28/34:  50%|█████     | 2/4 [00:19<00:19,  9.71s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 28/34:  75%|███████▌  | 3/4 [00:29<00:09,  9.70s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 42
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '42']
The probability is:  42


Processing molmo batch 28/34: 100%|██████████| 4/4 [00:38<00:00,  9.70s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 29/34:  25%|██▌       | 1/4 [00:09<00:28,  9.39s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 95
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '95']
The probability is:  95


Processing molmo batch 29/34:  50%|█████     | 2/4 [00:18<00:18,  9.37s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 65
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '65']
The probability is:  65


Processing molmo batch 29/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.34s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 29/34: 100%|██████████| 4/4 [00:37<00:00,  9.37s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 30/34:  25%|██▌       | 1/4 [00:08<00:26,  8.86s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 30/34:  50%|█████     | 2/4 [00:17<00:17,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 30
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '30']
The probability is:  30


Processing molmo batch 30/34:  75%|███████▌  | 3/4 [00:26<00:08,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 35
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '35']
The probability is:  35


Processing molmo batch 30/34: 100%|██████████| 4/4 [00:35<00:00,  8.85s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 95
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '95']
The probability is:  95


Processing molmo batch 31/34:  25%|██▌       | 1/4 [00:08<00:26,  8.84s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 31/34:  50%|█████     | 2/4 [00:17<00:17,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 90
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '90']
The probability is:  90


Processing molmo batch 31/34:  75%|███████▌  | 3/4 [00:26<00:08,  8.84s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 31/34: 100%|██████████| 4/4 [00:35<00:00,  8.85s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 42
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '42']
The probability is:  42


Processing molmo batch 32/34:  25%|██▌       | 1/4 [00:08<00:26,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 75
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '75']
The probability is:  75


Processing molmo batch 32/34:  50%|█████     | 2/4 [00:17<00:17,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 32/34:  75%|███████▌  | 3/4 [00:26<00:08,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 32/34: 100%|██████████| 4/4 [00:35<00:00,  8.86s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 33/34:  25%|██▌       | 1/4 [00:09<00:28,  9.35s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 33/34:  50%|█████     | 2/4 [00:18<00:18,  9.35s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 55
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '55']
The probability is:  55


Processing molmo batch 33/34:  75%|███████▌  | 3/4 [00:28<00:09,  9.35s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 33/34: 100%|██████████| 4/4 [00:37<00:00,  9.36s/it]


Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 34/34:  25%|██▌       | 1/4 [00:08<00:26,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'nohloh' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'nohloh'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85


Processing molmo batch 34/34:  50%|█████     | 2/4 [00:17<00:17,  8.86s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'loomoo' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 50
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'loomoo'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '50']
The probability is:  50


Processing molmo batch 34/34:  75%|███████▌  | 3/4 [00:26<00:08,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'kuhpay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 25
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'kuhpay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '25']
The probability is:  25


Processing molmo batch 34/34: 100%|██████████| 4/4 [00:35<00:00,  8.85s/it]

Model output:  User: On a scale from 0 to 100, how well does the abstract word 'puhkay' describe the following image? 0 means the word and image have no connection and 100 means the word perfectly fits the image. Answer only with a single number between 0 and 100. Do not include any other words. Assistant: 85
The stripped generated text is:  ['User:', 'On', 'a', 'scale', 'from', '0', 'to', '100,', 'how', 'well', 'does', 'the', 'abstract', 'word', "'puhkay'", 'describe', 'the', 'following', 'image?', '0', 'means', 'the', 'word', 'and', 'image', 'have', 'no', 'connection', 'and', '100', 'means', 'the', 'word', 'perfectly', 'fits', 'the', 'image.', 'Answer', 'only', 'with', 'a', 'single', 'number', 'between', '0', 'and', '100.', 'Do', 'not', 'include', 'any', 'other', 'words.', 'Assistant:', '85']
The probability is:  85
Analysis complete. Results saved to CSV files and visualizations saved as PNG files.



