# transformers

In [1]:
!pip install -r requirements.txt



In [2]:
import transformers
import torch

In [3]:
transformers.__version__

'4.48.3'

## 数据集

In [4]:
!rm -rf ~/.cache/huggingface/datasets/glue

In [5]:
from datasets import load_dataset

raw_datasets = load_dataset("glue", "mrpc")
raw_datasets

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 3668
    })
    validation: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 408
    })
    test: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 1725
    })
})

In [6]:
!ls -al ~/.cache/huggingface/datasets/glue/mrpc/0.0.0

total 12
drwxr-xr-x 3 root root 4096 Feb 25 06:41 .
drwxr-xr-x 3 root root 4096 Feb 25 06:41 ..
drwxr-xr-x 2 root root 4096 Feb 25 06:41 bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c
-rw-r--r-- 1 root root    0 Feb 25 06:41 bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c_builder.lock
-rw-r--r-- 1 root root    0 Feb 25 06:41 bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c.incomplete_info.lock


In [7]:
raw_datasets["train"][0]

{'sentence1': 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
 'sentence2': 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
 'label': 1,
 'idx': 0}

In [8]:
raw_datasets["train"][1]

{'sentence1': "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
 'sentence2': "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
 'label': 0,
 'idx': 1}

In [9]:
raw_datasets["test"][1]

{'sentence1': "The world 's two largest automakers said their U.S. sales declined more than predicted last month as a late summer sales frenzy caused more of an industry backlash than expected .",
 'sentence2': 'Domestic sales at both GM and No. 2 Ford Motor Co. declined more than predicted as a late summer sales frenzy prompted a larger-than-expected industry backlash .',
 'label': 1,
 'idx': 1}

In [10]:
raw_train_datasets = raw_datasets["train"]
raw_train_datasets

Dataset({
    features: ['sentence1', 'sentence2', 'label', 'idx'],
    num_rows: 3668
})

In [11]:
raw_train_datasets.features

{'sentence1': Value(dtype='string', id=None),
 'sentence2': Value(dtype='string', id=None),
 'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None),
 'idx': Value(dtype='int32', id=None)}

## 使用 AutoTokenizer 处理数据

In [12]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
tokenizer

BertTokenizerFast(name_or_path='google-bert/bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

In [13]:
inputs = tokenizer(
    'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
    "The world 's two largest automakers said their U.S. sales declined more than predicted last month as a late summer sales frenzy caused more of an industry backlash than expected .",
)
inputs

{'input_ids': [101, 2572, 3217, 5831, 5496, 2010, 2567, 1010, 3183, 2002, 2170, 1000, 1996, 7409, 1000, 1010, 1997, 9969, 4487, 23809, 3436, 2010, 3350, 1012, 102, 1996, 2088, 1005, 1055, 2048, 2922, 8285, 12088, 2056, 2037, 1057, 1012, 1055, 1012, 4341, 6430, 2062, 2084, 10173, 2197, 3204, 2004, 1037, 2397, 2621, 4341, 21517, 3303, 2062, 1997, 2019, 3068, 25748, 2084, 3517, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

### tokenizer 输出

* input_ids 是两个语句合并成一个语句之后的编码
* token_type_ids 指示 input_ids 编码属于哪个语句
* attention_mask

In [14]:
tokenizer.convert_ids_to_tokens(inputs["input_ids"])

['[CLS]',
 'am',
 '##ro',
 '##zi',
 'accused',
 'his',
 'brother',
 ',',
 'whom',
 'he',
 'called',
 '"',
 'the',
 'witness',
 '"',
 ',',
 'of',
 'deliberately',
 'di',
 '##stor',
 '##ting',
 'his',
 'evidence',
 '.',
 '[SEP]',
 'the',
 'world',
 "'",
 's',
 'two',
 'largest',
 'auto',
 '##makers',
 'said',
 'their',
 'u',
 '.',
 's',
 '.',
 'sales',
 'declined',
 'more',
 'than',
 'predicted',
 'last',
 'month',
 'as',
 'a',
 'late',
 'summer',
 'sales',
 'frenzy',
 'caused',
 'more',
 'of',
 'an',
 'industry',
 'backlash',
 'than',
 'expected',
 '.',
 '[SEP]']

In [15]:
def tokenizer_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)

In [16]:
tokenized_datasets = raw_datasets.map(tokenizer_function, batched=True)
tokenized_datasets

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3668
    })
    validation: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 408
    })
    test: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1725
    })
})

In [17]:
tokenized_datasets["train"][1]

{'sentence1': "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
 'sentence2': "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
 'label': 0,
 'idx': 1,
 'input_ids': [101,
  9805,
  3540,
  11514,
  2050,
  3079,
  11282,
  2243,
  1005,
  1055,
  2077,
  4855,
  1996,
  4677,
  2000,
  3647,
  4576,
  1999,
  2687,
  2005,
  1002,
  1016,
  1012,
  1019,
  4551,
  1012,
  102,
  9805,
  3540,
  11514,
  2050,
  4149,
  11282,
  2243,
  1005,
  1055,
  1999,
  2786,
  2005,
  1002,
  6353,
  2509,
  2454,
  1998,
  2853,
  2009,
  2000,
  3647,
  4576,
  2005,
  1002,
  1015,
  1012,
  1022,
  4551,
  1999,
  2687,
  1012,
  102],
 'token_type_ids': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1

In [18]:
tokenized_train_datasets = tokenized_datasets["train"]
tokenized_train_datasets

Dataset({
    features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 3668
})

In [19]:
tokenized_train_datasets.features

{'sentence1': Value(dtype='string', id=None),
 'sentence2': Value(dtype='string', id=None),
 'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None),
 'idx': Value(dtype='int32', id=None),
 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None)}

## PyTorch DataLoaders

In [20]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
data_collator

DataCollatorWithPadding(tokenizer=BertTokenizerFast(name_or_path='google-bert/bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
), padding=True, max_length=None, pad_to_multiple_of=None, return_tensors='pt')

In [21]:
"""
   'sentence1' 'sentence2' 'label' 'idx' 'input_ids' 'token_type_ids' 'attention_mask'
0
1
2
3
"""

tokenized_datasets["train"]

Dataset({
    features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 3668
})

In [22]:
len(tokenized_datasets["train"]["sentence1"])

3668

In [23]:
tokenized_datasets["train"][0]

{'sentence1': 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
 'sentence2': 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
 'label': 1,
 'idx': 0,
 'input_ids': [101,
  2572,
  3217,
  5831,
  5496,
  2010,
  2567,
  1010,
  3183,
  2002,
  2170,
  1000,
  1996,
  7409,
  1000,
  1010,
  1997,
  9969,
  4487,
  23809,
  3436,
  2010,
  3350,
  1012,
  102,
  7727,
  2000,
  2032,
  2004,
  2069,
  1000,
  1996,
  7409,
  1000,
  1010,
  2572,
  3217,
  5831,
  5496,
  2010,
  2567,
  1997,
  9969,
  4487,
  23809,
  3436,
  2010,
  3350,
  1012,
  102],
 'token_type_ids': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1],
 'attention_mask': [1,
  1,
  1,
  1,
  1,
 

In [24]:
samples = tokenized_datasets["train"][:20]
for k, v in samples.items():
    print("k:", k)
    print("v:", v)
print("-----" * 5)
samples = {
    k: v for k, v in samples.items() if k not in ["sentence1", "sentence2", "idx"]
}
for k, v in samples.items():
    print("k:", k)
    print("v:", v)
print("-----" * 5)
"""
[
    [101, 2572, 3217, ..., 5831, 5496, 1012, 102],
    [101, 9805, 3540, ..., 11514, 1022, 4551, 1999, 2687, 1012, 102],
    ...
    [101, 1996, 2047, 2424, 1012, 102]
]
"""
[len(x) for x in samples["input_ids"]]

k: sentence1
v: ['Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", 'They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .', 'Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .', 'The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .', 'Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .', 'The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .', 'The DVD-CCA then appealed to the state Supreme Court .', 'That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .', 'Shares of Genentech , a much larger company with several products on the market , rose more than 2 pe

[50,
 59,
 47,
 67,
 59,
 50,
 62,
 32,
 45,
 60,
 51,
 47,
 42,
 61,
 53,
 44,
 53,
 79,
 57,
 70]

In [25]:
batch = data_collator(samples)
batch

{'input_ids': tensor([[  101,  2572,  3217,  ...,     0,     0,     0],
        [  101,  9805,  3540,  ...,     0,     0,     0],
        [  101,  2027,  2018,  ...,     0,     0,     0],
        ...,
        [  101, 20682,  2050,  ...,  2545,  1012,   102],
        [  101,  1000,  1045,  ...,     0,     0,     0],
        [  101,  1996,  2047,  ...,     0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), 'labels': tensor([1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1])}

In [26]:
{k: v.shape for k, v in batch.items()}

{'input_ids': torch.Size([20, 79]),
 'token_type_ids': torch.Size([20, 79]),
 'attention_mask': torch.Size([20, 79]),
 'labels': torch.Size([20])}

## 训练

In [27]:
from transformers import TrainingArguments

training_args = TrainingArguments("first-trainer")
training_args

TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=IntervalStrategy.NO,
eval_us

In [28]:
training_args = TrainingArguments(
    run_name="first-trainer", output_dir="./results", logging_dir="./logs", report_to=[]
)
training_args

TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=IntervalStrategy.NO,
eval_us

In [29]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    "google-bert/bert-base-uncased", num_labels=2
)
model

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [30]:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
)
trainer

  trainer = Trainer(


<transformers.trainer.Trainer at 0x78ef3dfbcb10>

In [31]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
trainer.train()

Step,Training Loss
500,0.5194
1000,0.272


TrainOutput(global_step=1377, training_loss=0.31675101053740723, metrics={'train_runtime': 210.4013, 'train_samples_per_second': 52.3, 'train_steps_per_second': 6.545, 'total_flos': 405114969714960.0, 'train_loss': 0.31675101053740723, 'epoch': 3.0})

In [32]:
!ls -al ./results/

total 20
drwxr-xr-x 5 root root 4096 Feb 25 06:45 .
drwxr-xr-x 1 root root 4096 Feb 25 06:40 ..
drwxr-xr-x 2 root root 4096 Feb 25 06:44 checkpoint-1000
drwxr-xr-x 2 root root 4096 Feb 25 06:45 checkpoint-1377
drwxr-xr-x 2 root root 4096 Feb 25 06:43 checkpoint-500


In [33]:
!ls -al ./results/checkpoint-500

total 1284184
drwxr-xr-x 2 root root      4096 Feb 25 06:43 .
drwxr-xr-x 5 root root      4096 Feb 25 06:45 ..
-rw-r--r-- 1 root root       739 Feb 25 06:43 config.json
-rw-r--r-- 1 root root 437958648 Feb 25 06:43 model.safetensors
-rw-r--r-- 1 root root 876038394 Feb 25 06:43 optimizer.pt
-rw-r--r-- 1 root root     14244 Feb 25 06:43 rng_state.pth
-rw-r--r-- 1 root root      1064 Feb 25 06:43 scheduler.pt
-rw-r--r-- 1 root root       125 Feb 25 06:43 special_tokens_map.json
-rw-r--r-- 1 root root      1221 Feb 25 06:43 tokenizer_config.json
-rw-r--r-- 1 root root    711494 Feb 25 06:43 tokenizer.json
-rw-r--r-- 1 root root       930 Feb 25 06:43 trainer_state.json
-rw-r--r-- 1 root root      5240 Feb 25 06:43 training_args.bin
-rw-r--r-- 1 root root    231508 Feb 25 06:43 vocab.txt


In [34]:
model.save_pretrained("./SaveModel")

In [35]:
!ls -al ./SaveModel

total 427708
drwxr-xr-x 2 root root      4096 Feb 25 06:45 .
drwxr-xr-x 1 root root      4096 Feb 25 06:45 ..
-rw-r--r-- 1 root root       739 Feb 25 06:45 config.json
-rw-r--r-- 1 root root 437958648 Feb 25 06:45 model.safetensors
