# CKPT File Investigation

This notebook is designed to investigate and troubleshoot issues with loading CKPT files.

In [13]:
pip install -r /TensorKiko/requirements.txt

[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: '/TensorKiko/requirements.txt'[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
import torch
import pickle
import os
import struct

def check_file(ckpt_path):
    if not os.path.exists(ckpt_path):
        print(f"File not found: {ckpt_path}")
        return False
    print(f"File found: {ckpt_path}")
    print(f"File size: {os.path.getsize(ckpt_path) / (1024 * 1024):.2f} MB")
    return True

def try_load_ckpt(file_path, encoding=None):
    try:
        if encoding:
            with open(file_path, 'rb') as f:
                state_dict = torch.load(f, map_location="cpu", encoding=encoding)
        else:
            state_dict = torch.load(file_path, map_location="cpu")
        print("Successfully loaded the checkpoint.")
        return state_dict
    except Exception as e:
        print(f"Failed to load the checkpoint. Error: {str(e)}")
        return None

def inspect_file_header(file_path, num_bytes=100):
    with open(file_path, 'rb') as f:
        header = f.read(num_bytes)
    print(f"First {num_bytes} bytes of the file:")
    print(header)
    print("\nAs ASCII:")
    print(''.join(chr(b) if 32 <= b < 127 else '.' for b in header))

def try_manual_unpickle(file_path):
    try:
        with open(file_path, 'rb') as f:
            data = pickle.load(f)
        print("Successfully unpickled the file.")
        return data
    except Exception as e:
        print(f"Failed to unpickle the file. Error: {str(e)}")
        return None

def investigate_ckpt(ckpt_path):
    if not check_file(ckpt_path):
        return

    print("\nAttempting to load with default settings...")
    state_dict = try_load_ckpt(ckpt_path)

    if state_dict is None:
        print("\nAttempting to load with 'latin1' encoding...")
        state_dict = try_load_ckpt(ckpt_path, encoding='latin1')

    print("\nInspecting file header:")
    inspect_file_header(ckpt_path)

    print("\nAttempting manual unpickling:")
    manual_unpickled = try_manual_unpickle(ckpt_path)

    if state_dict is not None:
        print("\nCheckpoint loaded successfully. Here's a summary of its contents:")
        for key, value in state_dict.items():
            if isinstance(value, torch.Tensor):
                print(f"{key}: Tensor of shape {value.shape}")
            else:
                print(f"{key}: {type(value)}")
    else:
        print("\nUnable to load the checkpoint. You may need to implement a custom loading function based on the file structure.")

if __name__ == "__main__":
    ckpt_path = 'clip_vit_l14_vision_model_f16.ckpt'  # Replace with your file path
    investigate_ckpt(ckpt_path)

In [None]:
import sqlite3
import torch
import io

def investigate_sqlite_ckpt(file_path):
    print(f"Investigating SQLite file: {file_path}")
    
    try:
        conn = sqlite3.connect(file_path)
        cursor = conn.cursor()
        
        # Get list of tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cursor.fetchall()
        
        print("\nTables found in the database:")
        for table in tables:
            print(table[0])
        
        # Assuming there's a table named 'weights' or similar
        table_name = tables[0][0] if tables else None
        
        if table_name:
            print(f"\nInvestigating table: {table_name}")
            
            # Get column information
            cursor.execute(f"PRAGMA table_info({table_name})")
            columns = cursor.fetchall()
            print("\nColumns:")
            for column in columns:
                print(f"  {column[1]} ({column[2]})")
            
            # Fetch a sample row
            cursor.execute(f"SELECT * FROM {table_name} LIMIT 1")
            sample_row = cursor.fetchone()
            
            if sample_row:
                print("\nSample row:")
                for i, value in enumerate(sample_row):
                    print(f"  {columns[i][1]}: {type(value)}")
                
                # If there's a column that might contain tensor data (e.g., 'weight' or 'value')
                tensor_column = next((col[1] for col in columns if col[1].lower() in ['weight', 'value', 'tensor']), None)
                
                if tensor_column:
                    print(f"\nAttempting to load a tensor from the '{tensor_column}' column...")
                    cursor.execute(f"SELECT {tensor_column} FROM {table_name} LIMIT 1")
                    tensor_data = cursor.fetchone()[0]
                    
                    try:
                        tensor = torch.load(io.BytesIO(tensor_data))
                        print(f"Successfully loaded a tensor of shape: {tensor.shape}")
                    except Exception as e:
                        print(f"Failed to load tensor: {str(e)}")
            else:
                print("No data found in the table.")
        
        conn.close()
        
    except Exception as e:
        print(f"Error investigating the SQLite file: {str(e)}")

if __name__ == "__main__":
    ckpt_path = 'clip_vit_l14_vision_model_f16.ckpt'  # Replace with your file path
    investigate_sqlite_ckpt(ckpt_path)

In [None]:
import sqlite3
import torch
import io
import struct
import numpy as np

def parse_tensor_data(data_blob, dim):
    # Try different methods to interpret the data
    methods = [
        lambda: torch.from_numpy(np.frombuffer(data_blob, dtype=np.float32).reshape(dim)),
        lambda: torch.from_numpy(np.frombuffer(data_blob, dtype=np.float16).reshape(dim)),
        lambda: torch.load(io.BytesIO(data_blob)),
        lambda: torch.tensor(struct.unpack(f'{len(data_blob)//4}f', data_blob)).reshape(dim)
    ]
    
    for method in methods:
        try:
            return method()
        except Exception:
            continue
    
    raise ValueError("Unable to parse tensor data")

def load_sqlite_ckpt(file_path):
    print(f"Loading SQLite-based checkpoint: {file_path}")
    
    state_dict = {}
    
    try:
        conn = sqlite3.connect(file_path)
        cursor = conn.cursor()
        
        cursor.execute("SELECT name, dim, data FROM tensors")
        rows = cursor.fetchall()
        
        for name, dim_blob, data_blob in rows:
            # Parse dimension
            dim = struct.unpack('!' + 'q' * (len(dim_blob) // 8), dim_blob)
            
            try:
                # Attempt to load tensor data
                tensor = parse_tensor_data(data_blob, dim)
                
                # Verify shape
                if tensor.shape != dim:
                    print(f"Warning: Shape mismatch for {name}. Expected {dim}, got {tensor.shape}")
                
                state_dict[name] = tensor
            except Exception as e:
                print(f"Error loading tensor {name}: {str(e)}")
        
        conn.close()
        
        print(f"Successfully loaded {len(state_dict)} tensors")
        return state_dict
        
    except Exception as e:
        print(f"Error loading the SQLite-based checkpoint: {str(e)}")
        return None

if __name__ == "__main__":
    ckpt_path = 'clip_vit_l14_vision_model_f16.ckpt'  # Replace with your file path
    state_dict = load_sqlite_ckpt(ckpt_path)
    
    if state_dict:
        print("\nCheckpoint contents:")
        for name, tensor in state_dict.items():
            print(f"{name}: {tensor.shape}")

If all previous attempts fail, you may need to implement a custom loading function based on the file structure. This would require more in-depth analysis of the file format.