In [1]:
import os
from tqdm import tqdm
from data_handler import DataHandler
from evaluation import get_evaluation
from image_processor import ImageProcessor
from model_handler import get_model

In [2]:
def main(model_name, model_size, model_path, device_map, data_base_path, output_base_path):
    # Initialize model handler, data handler, and image processor
    model = get_model(model_name, model_size, model_path, device_map)
    data_handler = DataHandler(data_base_path)
    image_processor = ImageProcessor()

    # Evaluation settings and data types
    settings = ["default", "student-forcing", "teacher-forcing", "single"]
    data_types = ["train", "validation"]

    # Iterate through each setting and data type
    for setting in tqdm(settings, desc="Settings"):
        for data_type in tqdm(data_types, desc=f"Data Types for {setting}", leave=False):
            # Load the data for the current data type
            data = data_handler.load_data(data_type)
            
            # Get the evaluation class for the current setting
            evaluation_class = get_evaluation(setting)
            
            # Define the output path
            output_path = os.path.join(output_base_path, model_name+model_size, setting, data_type)
            
            # Process each data file
            for file_name, entries in tqdm(data.items(), desc=f"Files for {data_type}", leave=False):
                processed_data = []
                photo2answer = {}
                acc_list = [0, 0, 0, 0, 0]
                evaluator = evaluation_class(model, image_processor, data_base_path, data_type)

                try:
                    entries = entries[:5]
                    for entry in tqdm(entries, desc=f"Entries for {file_name}", leave=False):
                        evaluator.process_entry(entry, acc_list, photo2answer)
                        processed_data.append(entry)

                        # Save partial results after processing each entry
                        metrics = evaluator.calculate_metrics(acc_list)
                        data_handler.save_partial_results(processed_data, metrics, photo2answer, output_path, file_name)
                    # data_handler.save_final_results(processed_data, metrics, photo2answer, output_path, file_name)
                except Exception as e:
                    print(f"Error processing file {file_name} in setting {setting}, data type {data_type}: {e}")

In [3]:
# Define parameters
model_name = "llavanext"
model_size = "34b"
model_path = "/scratch/rqa8sm/ROPE/llava-v1.6-34b-hf"
device_map = "balanced"
data_base_path = "/scratch/rqa8sm/ROPE/ROPE"
output_base_path = "/scratch/rqa8sm/ROPE/output-experiments"

# Run the main function
main(model_name, model_size, model_path, device_map, data_base_path, output_base_path)

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

Settings:   0%|          | 0/4 [00:00<?, ?it/s]
[A

[A[A

[A[A

predicted_class:  ['bookcase', 'bookcase', 'bookcase', 'bookcase', 'bookcase']
Results saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_data.json
Metrics saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_metrics.json
Photo2Answer mapping saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_photo2answer.json




[A[A

predicted_class:  ['rock', 'rock', 'rock', 'rock', 'rock']
Results saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_data.json
Metrics saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_metrics.json
Photo2Answer mapping saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_photo2answer.json




[A[A

predicted_class:  ['rock', 'rock', 'rock', 'rock', 'rock']
Results saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_data.json
Metrics saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_metrics.json
Photo2Answer mapping saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_photo2answer.json




[A[A

predicted_class:  ['building', 'window', 'window', 'window', 'window']
Results saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_data.json
Metrics saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_metrics.json
Photo2Answer mapping saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_photo2answer.json




[A[A

[A[A
[A

predicted_class:  ['book', 'book', 'book', 'book', 'book']
Results saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_data.json
Metrics saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_metrics.json
Photo2Answer mapping saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/AAAAB/AAAAB_photo2answer.json




[A[A

[A[A

predicted_class:  ['bookcase', 'bookcase', 'bookcase', 'bookcase', 'bookcase']
Results saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/BAAAA/BAAAA_data.json
Metrics saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/BAAAA/BAAAA_metrics.json
Photo2Answer mapping saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/BAAAA/BAAAA_photo2answer.json




[A[A

predicted_class:  ['rock', 'rock', 'rock', 'rock', 'rock']
Results saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/BAAAA/BAAAA_data.json
Metrics saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/BAAAA/BAAAA_metrics.json
Photo2Answer mapping saved to /scratch/rqa8sm/ROPE/output-experiments/llavanext34b/default/train/BAAAA/BAAAA_photo2answer.json
