In [4]:
import torch
from huggingface_hub import hf_hub_download

# Download the tensor from huggingface hub
file_path = hf_hub_download(
    repo_id="thunlp/LLaMA3-Instruct-8B-FR-Spec",
    filename="freq_32768.pt"
)

# Load the tensor
tensor = torch.load(file_path, map_location='cpu')
tensor

# Verify what type of object this is
print(f"Type: {type(tensor)}")
print(f"Length: {len(tensor)}")
print(f"First 10 elements: {tensor[:10]}")
print(f"Last 10 elements: {tensor[-10:]}")

# Check if all elements are integers
print(f"All elements are integers: {all(isinstance(x, int) for x in tensor)}")

# Get some statistics
print(f"Min value: {min(tensor)}")
print(f"Max value: {max(tensor)}")
print(f"Number of unique values: {len(set(tensor))}")

# Check if the sequence is contiguous
is_contiguous = tensor == list(range(min(tensor), max(tensor) + 1))
print(f"Is the sequence contiguous? {is_contiguous}")

# Check if it's a contiguous sequence or has gaps
tensor_set = set(tensor)
expected_range = set(range(min(tensor), max(tensor) + 1))
missing_values = expected_range - tensor_set
print(f"Missing values in range: {len(missing_values)} out of {len(expected_range)}")
if len(missing_values) <= 20:  # Only show if not too many
    print(f"Some missing values: {sorted(list(missing_values))[:20]}")

# Check if the sequence is sorted
is_sorted = tensor == sorted(tensor)
print(f"Is the sequence sorted? {is_sorted}")

Type: <class 'list'>
Length: 32768
First 10 elements: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Last 10 elements: [122098, 123282, 124232, 125341, 126029, 126058, 126437, 126459, 128000, 128009]
All elements are integers: True
Min value: 0
Max value: 128009
Number of unique values: 32768
Is the sequence contiguous? False
Missing values in range: 95242 out of 128010
Is the sequence sorted? True
