In [27]:
import os, sys
import json
from pathlib import Path
import random
from typing import Any, Dict, List
import glob
from google.colab import drive
%cd /content
content = Path('/content')
skol = content / 'drive/My Drive/SKOL'
piggyatbaqaqi = skol / 'github.com/piggyatbaqaqi'
drive.mount(str(content / "drive"), force_remount=True)
cache_path = content / 'cache'
ollama_cache_path = content / 'ollama_cache'
nb_path = content / 'packages_mistral'
if not os.path.exists(nb_path):
  nb_path.symlink_to(skol / 'packages_mistral')
skol_client = content / 'skol'
if not os.path.exists(skol_client):
  skol_client.symlink_to(piggyatbaqaqi / 'skol')
if not os.path.exists(cache_path):
  cache_path.symlink_to(skol / 'pip_cache')
if not os.path.exists(ollama_cache_path):
  ollama_cache_path.symlink_to(skol / 'ollama_cache')
os.environ['OLLAMA_MODELS'] = str(ollama_cache_path)
ist691 = skol_client / 'IST691'

sys.path.insert(0, str(nb_path))
sys.path.insert(0, str(piggyatbaqaqi / 'skol'))

/content
Mounted at /content/drive


In [17]:
# You only need to run this once per machine
try:
  import bitsandbytes
except ImportError:
  !pip install  --cache-dir=$cache_path --target=$nb_path bitsandbytes
try:
  import transformers
except ImportError:
  !pip install  --cache-dir=$cache_path --target=$nb_path transformers
try:
  import peft
except ImportError:
  !pip install  --cache-dir=$cache_path --target=$nb_path peft
try:
  import accelerate
except ImportError:
  !pip install  --cache-dir=$cache_path --target=$nb_path accelerate
try:
  import datasets
  import ipywidgets
  import fsspec
except ImportError:
  !pip install --cache-dir=$cache_path --target=$nb_path datasets scipy ipywidgets
  !pip install --cache-dir=$cache_path --target=$nb_path fsspec==2023.9.2


### Set up git clients

In [18]:
if not os.path.exists(piggyatbaqaqi):
  %mkdir -p $piggyatbaqaqi
if not os.path.exists(piggyatbaqaqi / 'skol'):
  %cd $piggyatbaqaqi
  !git clone https://github.com/piggyatbaqaqi/skol.git
sys.path.insert(0, piggyatbaqaqi / 'skol')
if not os.path.exists(piggyatbaqaqi / 'dr-drafts-mycosearch'):
  %cd $piggyatbaqaqi
  !git clone https://github.com/piggyatbaqaqi/dr-drafts-mycosearch.git
workdir = skol / 'IST691'
%cd $workdir

/content/drive/My Drive/SKOL/IST691


### Set up SKOL-specific code

In [19]:
from finder import read_files, parse_annotated, target_classes
from label import Label
from taxon import Taxon, group_paragraphs

SEED=12345
default_label = Label('Misc-exposition')
keep_labels = [Label('Description'), Label('Nomenclature')]

In [20]:
raw_directory_path = skol / 'raw_2025_02_05/'
ann_directory_path = skol / 'annotated_2025_02_27/journals'

In [21]:
# Function that reports all the txt files under a Google Drive folder path
def listFiles(folder: str) -> List[str]:
  # List all files in the folder
  try:
      files = [file for file in glob.glob(f'{folder}/**/*.txt*', recursive=True) if 'Sydowia' not in file]
      return files
  except FileNotFoundError:
      print(f"Folder '{folder}' not found.")
  except PermissionError:
      print(f"Permission denied to access folder '{folder}'.")

In [22]:
# check files in annotated directory
training_files = listFiles(ann_directory_path)
training_files[:10]

['/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol057/n1.txt.ann',
 '/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol054/n1.txt.ann',
 '/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol118/s17.txt.ann',
 '/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol118/s29.txt.ann',
 '/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol118/s30.txt.ann',
 '/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol118/s7.txt.ann',
 '/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol118/s21.txt.ann',
 '/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol118/s13.txt.ann',
 '/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol118/s1.txt.ann',
 '/content/drive/My Drive/SKOL/annotated_2025_02_27/journals/Mycotaxon/Vol118/s46.txt.ann']

In [23]:
paragraphs = list(parse_annotated(read_files(random.sample(training_files, 20))))
relabeled = list(target_classes(default=default_label, keep=keep_labels, paragraphs=paragraphs))

In [24]:
prompt = '''Please extract features, subfeatures, optional subsubfeatures, and values from the following species description.
Format the output as JSON.
The top level of the JSON is feature names. The next level in is subfeature names . The optional next level in is subsubfeature names.
The innermost layer is lists of string-valued values.
Lists are only present at the innermost level of the JSON.
Feature values that are comma-separated strings should be broken down into separate values.
Translate Latin paragraphs to English.
'''

### The JSON keys are by feature, further broken down by subfeature (make sure to distinguish Type from Shape) and further broken down by optional subsubfeature, with lists of string values at the innermost layer.

description = """Fungus anamorphicus. Coloniae in substrato naturali eﬀusae, nigrae. Mycelium
superﬁciale, ex hyphis ramosis, septatis, pallide brunneis vel brunneis, laevibus, 1.5–3
μm crassis compositum. Conidiophora nulla vel brevis, 1–3-septata, brunnea vel
atrobrunnea, 11–28 × 4.5–5 μm. Cellula conidiogena monoblastica, determinatae,
solitaria, simplicia, lageniformia vel ampulliformia, brunnea vel atrobrunnea, laevia,
4.5–6.5 × 3.5–5 μm, ad apicem 3–4.5 μm crassa et truncatae. Conidiorum secessio
schizolytica. Conidia holoblastica, solitaria, acrogena, recta vel curvata, obclavata vel
obclavata-rostrata, atrobrunnea vel brunnea, laevia, 13–19-distoseptata, 130–190 μm
longa, 7–9 μm crassa, apicem versus ad 2–3 μm attenuata; cellula apicalis rotundata;
cellula basalis cylindrica vel conico-truncata, ad basim 3.5–4.5 μm crassa; Appendicibus
lateralibus 0–2, brunneae, septata, cylindricae, surgentibus ex cellulla e apicem 2nd vel
3rd.

Anamorphic fungi. Colonies on natural substrate eﬀuse, black. Mycelium
superﬁcial, composed of branched, septate, pale brown to brown, smoothwalled hyphae, 1.5–3 μm thick. Conidiophores absent or short, 1–3-septate,
brown to dark brown, 11–28 × 4.5–5 μm. Conidiogenous cells monoblastic,
determinate, solitary, simple, lageniform or ampulliform, brown to dark brown,
smooth, 4.5–6.5 × 3.5–5 μm, 3–4.5 μm wide at the truncate apex. Conidial
secession schizolytic. Conidia holoblastic, solitary, acrogenous, straight or
curved, obclavate to obclavate-rostrate, dark brown to brown, smooth, 13–19distoseptate, 130–190 μm long, 7–9 μm thick in the broadest part, tapering
to 2–3 μm near the apex; apical cells rounded; basal cell cylindrical, truncate,
3.5–4.5 μm wide; lateral appendages 0–2, brown, septate, cylindrical, arising
from the 2nd or 3rd cells from the apex.
"""

In [25]:
def load_json_training(filename: str) -> List[Dict[str, Any]]:
  retval = []
  state = 'START'  # 'description', 'result'
  with open(filename, "r", encoding="utf-8") as file:
    lines = []
    description = ''
    for line in file:
      if line.startswith('Send to LLM:'):
        if state == "result":
          result = ''.join(lines)
          try:
            result_dict = json.loads(result)
          except json.JSONDecodeError as err:
            print(f'Err: {err}\n{result}')
          retval.append({'description': description, 'result': json.dumps(result_dict)})
        lines = []
        state = 'description'
      elif line.startswith('Result:'):
        if state == "description":
          description = ''.join(lines)
          lines = []
        state = 'result'
      else:
        lines.append(line)
    if state == 'result' and len(lines) > 0:
      result = ''.join(lines)
      try:
        result_dict = json.loads(result)
      except json.JSONDecodeError as err:
        print(f'Err: {err}\n{result}')
      retval.append({'description': description, 'result': json.dumps(result_dict)})
  return retval


In [28]:
json_training = load_json_training(workdir / 'json_training.txt')
print(json_training[0])

{'description': '\n\nSaprobic on stems. Ascomata, superﬁcial, thyriothecial; in section conical,\nrelatively small, opening with a minute ﬂat or slightly papillate ostiole. Asci 8spored, bitunicate, ﬁssitunicate, cylindrical or obclavate. Ascospores, 1-septate,\nlight brown.\nAnamorphs: None reported for the genus (Hyde et al. 2011).\n\n\n', 'result': '{"habit": ["saprobic"], "habitat": ["on stems"], "ascomata": ["superficial", "thyriothecial", "conical", "relatively small", "opening with a minute flat or slightly papillate ostiole"], "asci": {"number of spores": ["8-spored"], "shape": ["bitunicate", "fissitunicate", "cylindrical or obclavate"]}, "ascospores": {"septation": ["1-septate"], "color": ["light brown"]}, "anamorphs": {"presence": ["none reported for the genus (hyde et al. 2011)"]}}'}


In [32]:
import datasets

dataset = datasets.Dataset.from_list(json_training)

new_dataset = datasets.Dataset.train_test_split(dataset,int(1))
temp_dataset = new_dataset["train"]
test_dataset = new_dataset["test"]
new_dataset2 = datasets.Dataset.train_test_split(temp_dataset,int(1))
train_dataset = new_dataset2["train"]
eval_dataset = new_dataset2["test"]

print(train_dataset,eval_dataset,test_dataset)

Dataset({
    features: ['description', 'result'],
    num_rows: 14
}) Dataset({
    features: ['description', 'result'],
    num_rows: 1
}) Dataset({
    features: ['description', 'result'],
    num_rows: 1
})


In [30]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)
accelerator = Accelerator(fsdp_plugin=fsdp_plugin)


In [33]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
base_model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config)



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [35]:
tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    model_max_length=1024,
    padding_side="left",
    add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token


In [36]:

def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=1024,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

In [39]:
def generate_and_tokenize_prompt(data_point):
    full_prompt =f"""{prompt}
Target sentence:
{data_point["description"]}
Result:
{data_point["result"]}
"""
    return tokenize(full_prompt)


In [41]:

tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)
tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)
tokenized_test_dataset = test_dataset.map(generate_and_tokenize_prompt)

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

In [45]:
print(tokenized_train_dataset[4]['input_ids'])
print(len(tokenized_train_dataset[4]['input_ids']))
print("Target Sentence: " + test_dataset[0]['description'])
print("Result: " + test_dataset[0]['result'] + "\n")


[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 

In [49]:
eval_prompt = f"""{prompt}
Target sentence:
{description}
Result:
"""



In [51]:
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=2048, pad_token_id=2)[0], skip_special_tokens=True))


Please extract features, subfeatures, optional subsubfeatures, and values from the following species description.
Format the output as JSON.
The top level of the JSON is feature names. The next level in is subfeature names . The optional next level in is subsubfeature names.
The innermost layer is lists of string-valued values.
Lists are only present at the innermost level of the JSON.
Feature values that are comma-separated strings should be broken down into separate values.
Translate Latin paragraphs to English.

Target sentence:
Fungus anamorphicus. Coloniae in substrato naturali eﬀusae, nigrae. Mycelium
superﬁciale, ex hyphis ramosis, septatis, pallide brunneis vel brunneis, laevibus, 1.5–3
μm crassis compositum. Conidiophora nulla vel brevis, 1–3-septata, brunnea vel
atrobrunnea, 11–28 × 4.5–5 μm. Cellula conidiogena monoblastica, determinatae,
solitaria, simplicia, lageniformia vel ampulliformia, brunnea vel atrobrunnea, laevia,
4.5–6.5 × 3.5–5 μm, ad apicem 3–4.5 μm crassa et tr

In [None]:
from peft import prepare_model_for_kbit_training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )




In [None]:
from peft import LoraConfig, get_peft_model
config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
print_trainable_parameters(model)
model = accelerator.prepare_model(model)



trainable params: 21260288 || all params: 3773331456 || trainable%: 0.5634354746703705


In [None]:
print(model)


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj)

In [None]:
if torch.cuda.device_count() > 1: # If more than 1 GPU
    model.is_parallelizable = True
    model.model_parallel = True


In [None]:
import transformers
from datetime import datetime
project = "skol-finetune"
base_model_name = "mistral"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name
tokenizer.pad_token = tokenizer.eos_token
trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    args=transformers.TrainingArguments(
        output_dir=output_dir,
        warmup_steps=5,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        max_steps=1000,
        learning_rate=2.5e-5, # Want about 10x smaller than the Mistral learning rate
        logging_steps=50,
        bf16=True,
        optim="paged_adamw_8bit",
        logging_dir="./logs",        # Directory for storing logs
        save_strategy="steps",       # Save the model checkpoint every logging step
        save_steps=50,                # Save checkpoints every 50 steps
        eval_strategy="steps", # Evaluate the model every logging step
        eval_steps=50,               # Evaluate and save checkpoints every 50 steps
        do_eval=True,                # Perform evaluation at the end of training
        report_to="wandb",           # Comment this out if you don't want to use weights & baises
        run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}"          # Name of the W&B run (optional)
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()



No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mpiggy-yarroll[0m ([33mpiggy-yarroll-carnegie-mellon-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
50,0.7581,0.278718
100,0.2524,0.226216
150,0.2153,0.198129
200,0.1885,0.18198
250,0.1782,0.174704
300,0.1714,0.170898
350,0.1674,0.168553
400,0.1698,0.166767
450,0.1598,0.164575
500,0.168,0.162663


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=1000, training_loss=0.19578335189819335, metrics={'train_runtime': 4018.6546, 'train_samples_per_second': 1.991, 'train_steps_per_second': 0.249, 'total_flos': 1.7525216609776435e+17, 'train_loss': 0.19578335189819335, 'epoch': 1.567398119122257})

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,  # Mistral, same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
    trust_remote_code=True,
    use_auth_token=True
)
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from peft import PeftModel
ft_model = PeftModel.from_pretrained(base_model, "mistral-skol-finetune/checkpoint-1000")
ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2)[0], skip_special_tokens=True))


Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
Target sentence:
Earlier, you stated that you didn't have strong feelings about PlayStation's Little Big Adventure. Is your opinion true for all games which don't have multiplayer?
Meaning representation:
 Given that you were indifferent about Little Big Adventure, do you feel the same about other games that don't have multiplayer?
Target sen

In [None]:
ft_model.save_pretrained(str(ist691 / 'mistral-skol-finetune'))
tokenizer.save_pretrained(str(ist691 / 'mistral-skol-finetune'))

/content
Mounted at /content/drive




('/content/drive/My Drive/SKOL/IST691/mistral-viggo-finetune/tokenizer_config.json',
 '/content/drive/My Drive/SKOL/IST691/mistral-viggo-finetune/special_tokens_map.json',
 '/content/drive/My Drive/SKOL/IST691/mistral-viggo-finetune/tokenizer.model',
 '/content/drive/My Drive/SKOL/IST691/mistral-viggo-finetune/added_tokens.json',
 '/content/drive/My Drive/SKOL/IST691/mistral-viggo-finetune/tokenizer.json')