In [1]:
import os
import torch
import zipfile
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model
from accelerate import init_empty_weights
from datasets import Dataset
 
# Set environment variable to manage memory fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 
# Specify directories and the path to the zip file
# save_dir = os.path.expanduser("llama_webtext/")
offload_dir = os.path.expanduser("llama_offload/")
# data_zip_path = "llama_webtext.7z"
# data_extract_path = os.path.expanduser("llama_webtext/data")
 
# Create directories if they don't exist
# os.makedirs(save_dir, exist_ok=True)
os.makedirs(offload_dir, exist_ok=True)
# os.makedirs(data_extract_path, exist_ok=True)
 
# Extract only the specified JSON file from the zip archive
target_file = "task024_cosmosqa_answer_generation.json"
# with zipfile.ZipFile(data_zip_path, 'r') as zip_ref:
#     zip_ref.extract(target_file, data_extract_path)
 
# Load tokenizer from Hugging Face
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
tokenizer.pad_token = tokenizer.eos_token
 
# Load the model with accelerate's offloading and device map auto-setup
with init_empty_weights():
    model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Meta-Llama-3-8B",
        device_map="auto",
        offload_folder=offload_dir,
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=True
    )
 
# Configure LoRA with reduced rank
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
 
# Load and process the JSON data from the extracted file


  from .autonotebook import tqdm as notebook_tqdm
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
  _ = torch.tensor([0], device=i)
Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.45s/it]


## Question Answering Fine Tuning Ahead:

In [2]:
target_file = r"task024_cosmosqa_answer_generation.json"
with open(target_file, 'r', encoding='utf-8-sig') as f:
    json_data = json.load(f)

# Extract text data (assuming a structure where the data you want is under 'Instances')

instances = json_data['Instances'][0:2223]
input_texts = [str(instance['input']) for instance in instances]  # Convert to string if not already
output_texts = [str(instance['output'][0]) if instance['output'] else "" for instance in instances]  # Handle missing output
print(input_texts, output_texts)




In [3]:
 
# Convert the texts into a Hugging Face Dataset
ds = Dataset.from_dict({'input': input_texts, 'output': output_texts})

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["input"], examples["output"], truncation=True, padding="max_length", max_length=512)

# Apply tokenization and set format
tokenized_datasets = ds.map(tokenize_function, batched=True, remove_columns=["input", "output"])
tokenized_datasets.set_format("torch")

# Split dataset into train and eval
train_size = int(0.9 * len(tokenized_datasets))
train_dataset = tokenized_datasets.select(range(train_size))
eval_dataset = tokenized_datasets.select(range(train_size, len(tokenized_datasets)))

# Define data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

Map: 100%|██████████| 2223/2223 [00:00<00:00, 5146.14 examples/s]


In [4]:
# Define training arguments
save_dir="finetuned-weights"
training_args = TrainingArguments(
    output_dir=save_dir,
    evaluation_strategy="steps",
    eval_steps=500,
    logging_steps=500,
    save_steps=1000,
    save_total_limit=2,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    report_to="none",
    fp16=torch.cuda.is_available(),
)
 
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)
 
# Train the model
trainer.train()
 
# Evaluate the model
with torch.no_grad():
    eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")
 
# Save the model and tokenizer

model.save_pretrained(os.path.join(save_dir, "fine-tuned-llama-lora"))
tokenizer.save_pretrained(os.path.join(save_dir, "fine-tuned-llama-lora"))
 
# Test the model on a sample input
input_text = "Hello, what is the meaning of life?"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
 
with torch.no_grad():
    outputs = model.generate(inputs["input_ids"], max_length=50, num_return_sequences=1)
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Generated output: {output_text}")
 
# Clear CUDA cache
torch.cuda.empty_cache()

  trainer = Trainer(
  attn_output = torch.nn.functional.scaled_dot_product_attention(
 33%|███▎      | 500/1500 [5:38:29<11:28:15, 41.30s/it]

{'loss': 2.7404, 'grad_norm': 3.805873155593872, 'learning_rate': 3.3400000000000005e-05, 'epoch': 1.0}


                                                       
 33%|███▎      | 500/1500 [5:59:29<11:28:15, 41.30s/it]

{'eval_loss': 2.6262338161468506, 'eval_runtime': 1259.934, 'eval_samples_per_second': 0.177, 'eval_steps_per_second': 0.022, 'epoch': 1.0}


 67%|██████▋   | 1000/1500 [11:41:58<5:38:47, 40.66s/it] 

{'loss': 2.4685, 'grad_norm': 3.4315085411071777, 'learning_rate': 1.6733333333333335e-05, 'epoch': 2.0}


                                                        
 67%|██████▋   | 1000/1500 [12:02:58<5:38:47, 40.66s/it]

{'eval_loss': 2.520962715148926, 'eval_runtime': 1259.7224, 'eval_samples_per_second': 0.177, 'eval_steps_per_second': 0.022, 'epoch': 2.0}


100%|██████████| 1500/1500 [17:57:44<00:00, 42.11s/it]    

{'loss': 2.4205, 'grad_norm': 3.515841007232666, 'learning_rate': 6.666666666666667e-08, 'epoch': 3.0}


                                                      
100%|██████████| 1500/1500 [18:19:43<00:00, 42.11s/it]

{'eval_loss': 2.516984701156616, 'eval_runtime': 1319.3768, 'eval_samples_per_second': 0.169, 'eval_steps_per_second': 0.021, 'epoch': 3.0}


100%|██████████| 1500/1500 [18:19:44<00:00, 43.99s/it]


{'train_runtime': 65984.1098, 'train_samples_per_second': 0.091, 'train_steps_per_second': 0.023, 'train_loss': 2.543170084635417, 'epoch': 3.0}


100%|██████████| 28/28 [20:53<00:00, 44.75s/it]


Evaluation results: {'eval_loss': 2.516984701156616, 'eval_runtime': 1300.1216, 'eval_samples_per_second': 0.172, 'eval_steps_per_second': 0.022, 'epoch': 3.0}


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated output: Hello, what is the meaning of life? Is it to be happy, to make others happy, to be sad, to make others sad, to be in love, to make others in love, to be in hate, to make others in


## Question Generation Finetuning Ahead:

In [5]:
target_file = r"task074_squad1.1_question_generation.json"
with open(target_file, 'r', encoding='utf-8-sig') as f:
    json_data = json.load(f)

# Extract text data (assuming a structure where the data you want is under 'Instances')

instances = json_data['Instances'][0:2223]
input_texts = [str(instance['input']) for instance in instances]  # Convert to string if not already
output_texts = [str(instance['output'][0]) if instance['output'] else "" for instance in instances]  # Handle missing output
print(input_texts, output_texts)




In [6]:
 
# Convert the texts into a Hugging Face Dataset
ds = Dataset.from_dict({'input': input_texts, 'output': output_texts})

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["input"], examples["output"], truncation=True, padding="max_length", max_length=512)

# Apply tokenization and set format
tokenized_datasets = ds.map(tokenize_function, batched=True, remove_columns=["input", "output"])
tokenized_datasets.set_format("torch")

# Split dataset into train and eval
train_size = int(0.9 * len(tokenized_datasets))
train_dataset = tokenized_datasets.select(range(train_size))
eval_dataset = tokenized_datasets.select(range(train_size, len(tokenized_datasets)))

# Define data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

Map: 100%|██████████| 2223/2223 [00:00<00:00, 4939.62 examples/s]


In [None]:
# Define training arguments
save_dir="finetuned-weights"
training_args = TrainingArguments(
    output_dir=save_dir,
    evaluation_strategy="steps",
    eval_steps=500,
    logging_steps=500,
    save_steps=1000,
    save_total_limit=2,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    report_to="none",
    fp16=torch.cuda.is_available(),
)
 
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)
 
# Train the model
trainer.train()
 
# Evaluate the model
with torch.no_grad():
    eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")
 
# Save the model and tokenizer

model.save_pretrained(os.path.join(save_dir, "fine-tuned-llama-lora"))
tokenizer.save_pretrained(os.path.join(save_dir, "fine-tuned-llama-lora"))
 
# Test the model on a sample input
input_text = "Hello, what is the meaning of life?"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
 
with torch.no_grad():
    outputs = model.generate(inputs["input_ids"], max_length=50, num_return_sequences=1)
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Generated output: {output_text}")
 
# Clear CUDA cache
torch.cuda.empty_cache()

  trainer = Trainer(
 33%|███▎      | 500/1500 [5:58:37<11:33:51, 41.63s/it]

{'loss': 1.8981, 'grad_norm': 2.8968536853790283, 'learning_rate': 3.35e-05, 'epoch': 1.0}



 33%|███▎      | 500/1500 [6:20:35<11:33:51, 41.63s/it]

{'eval_loss': 1.8068513870239258, 'eval_runtime': 1317.7651, 'eval_samples_per_second': 0.169, 'eval_steps_per_second': 0.021, 'epoch': 1.0}


 67%|██████▋   | 1000/1500 [12:05:05<5:40:25, 40.85s/it] 

{'loss': 1.7843, 'grad_norm': 1.962471604347229, 'learning_rate': 1.6833333333333334e-05, 'epoch': 2.0}



 67%|██████▋   | 1000/1500 [12:26:42<5:40:25, 40.85s/it]

{'eval_loss': 1.7890751361846924, 'eval_runtime': 1297.381, 'eval_samples_per_second': 0.172, 'eval_steps_per_second': 0.022, 'epoch': 2.0}


100%|██████████| 1500/1500 [18:11:32<00:00, 41.15s/it]    

{'loss': 1.7536, 'grad_norm': 2.97499418258667, 'learning_rate': 1.6666666666666668e-07, 'epoch': 3.0}



100%|██████████| 1500/1500 [18:32:52<00:00, 41.15s/it]

{'eval_loss': 1.7843841314315796, 'eval_runtime': 1280.3864, 'eval_samples_per_second': 0.174, 'eval_steps_per_second': 0.022, 'epoch': 3.0}


100%|██████████| 1500/1500 [18:32:53<00:00, 44.52s/it]


{'train_runtime': 66773.2403, 'train_samples_per_second': 0.09, 'train_steps_per_second': 0.022, 'train_loss': 1.8120063069661458, 'epoch': 3.0}


100%|██████████| 28/28 [20:39<00:00, 44.27s/it]


Evaluation results: {'eval_loss': 1.7843841314315796, 'eval_runtime': 1284.7724, 'eval_samples_per_second': 0.174, 'eval_steps_per_second': 0.022, 'epoch': 3.0}


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Generated output: Hello, what is the meaning of life? Is it to be happy, to make others happy, to be successful, to be loved? Is it to be famous, to be rich, to be powerful? Is it to have a family,


  trainer = Trainer(
  7%|▋         | 110/1500 [1:21:57<17:40:35, 45.78s/it]

KeyboardInterrupt: 

## Sentiment Analysis

In [4]:
import os
import torch
import zipfile
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model
from accelerate import init_empty_weights
from datasets import Dataset

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 
offload_dir = os.path.expanduser("llama_offload/")
 
os.makedirs(offload_dir, exist_ok=True)

target_file = "task024_cosmosqa_answer_generation.json"
 
# Load tokenizer from Hugging Face
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
tokenizer.pad_token = tokenizer.eos_token
 
# Load the model with accelerate's offloading and device map auto-setup
with init_empty_weights():
    model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Meta-Llama-3-8B",
        device_map="auto",
        offload_folder=offload_dir,
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=True
    )
 
# Configure LoRA with reduced rank
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
  _ = torch.tensor([0], device=i)
Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.47s/it]


In [5]:
target_file = r"task1312_amazonreview_polarity_classification.json"
with open(target_file, 'r', encoding='utf-8-sig') as f:
    json_data = json.load(f)


instances = json_data['Instances'][0:2223]
input_texts = [str(instance['input']) for instance in instances]  # Convert to string if not already
output_texts = [str(instance['output'][0]) if instance['output'] else "" for instance in instances]  # Handle missing output
print(input_texts, output_texts)




In [6]:
 
# Convert the texts into a Hugging Face Dataset
ds = Dataset.from_dict({'input': input_texts, 'output': output_texts})

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["input"], examples["output"], truncation=True, padding="max_length", max_length=512)

# Apply tokenization and set format
tokenized_datasets = ds.map(tokenize_function, batched=True, remove_columns=["input", "output"])
tokenized_datasets.set_format("torch")

# Split dataset into train and eval
train_size = int(0.9 * len(tokenized_datasets))
train_dataset = tokenized_datasets.select(range(train_size))
eval_dataset = tokenized_datasets.select(range(train_size, len(tokenized_datasets)))

# Define data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

Map: 100%|██████████| 2223/2223 [00:00<00:00, 6073.96 examples/s]


In [7]:
# Define training arguments
save_dir="finetuned-weights"
training_args = TrainingArguments(
    output_dir=save_dir,
    evaluation_strategy="steps",
    eval_steps=500,
    logging_steps=500,
    save_steps=1000,
    save_total_limit=2,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    report_to="none",
    fp16=torch.cuda.is_available(),
)
 
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)
 
# Train the model
trainer.train()
 
# Evaluate the model
with torch.no_grad():
    eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")
 
# Save the model and tokenizer

model.save_pretrained(os.path.join(save_dir, "fine-tuned-llama-lora"))
tokenizer.save_pretrained(os.path.join(save_dir, "fine-tuned-llama-lora"))
 
# Test the model on a sample input
input_text = "Hello, what is the meaning of life?"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
 
with torch.no_grad():
    outputs = model.generate(inputs["input_ids"], max_length=50, num_return_sequences=1)
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Generated output: {output_text}")
 
# Clear CUDA cache
torch.cuda.empty_cache()

  trainer = Trainer(
  attn_output = torch.nn.functional.scaled_dot_product_attention(
 33%|███▎      | 500/1500 [4:27:42<8:59:27, 32.37s/it] 

{'loss': 2.9708, 'grad_norm': 2.651029586791992, 'learning_rate': 3.3400000000000005e-05, 'epoch': 1.0}


                                                      
 33%|███▎      | 500/1500 [4:47:57<8:59:27, 32.37s/it]

{'eval_loss': 2.92049503326416, 'eval_runtime': 1214.9482, 'eval_samples_per_second': 0.184, 'eval_steps_per_second': 0.023, 'epoch': 1.0}


 67%|██████▋   | 1000/1500 [9:46:43<5:13:04, 37.57s/it]  

{'loss': 2.7134, 'grad_norm': 3.778517484664917, 'learning_rate': 1.6800000000000002e-05, 'epoch': 2.0}


                                                       
 67%|██████▋   | 1000/1500 [10:08:09<5:13:04, 37.57s/it]

{'eval_loss': 2.764974594116211, 'eval_runtime': 1286.8725, 'eval_samples_per_second': 0.173, 'eval_steps_per_second': 0.022, 'epoch': 2.0}


100%|██████████| 1500/1500 [15:05:53<00:00, 34.30s/it]    

{'loss': 2.6276, 'grad_norm': 3.564077615737915, 'learning_rate': 1.3333333333333334e-07, 'epoch': 3.0}


                                                      
100%|██████████| 1500/1500 [15:26:46<00:00, 34.30s/it]

{'eval_loss': 2.7620482444763184, 'eval_runtime': 1252.6598, 'eval_samples_per_second': 0.178, 'eval_steps_per_second': 0.022, 'epoch': 3.0}


100%|██████████| 1500/1500 [15:26:46<00:00, 37.07s/it]


{'train_runtime': 55606.7823, 'train_samples_per_second': 0.108, 'train_steps_per_second': 0.027, 'train_loss': 2.7705938313802085, 'epoch': 3.0}


100%|██████████| 28/28 [20:12<00:00, 43.29s/it]


Evaluation results: {'eval_loss': 2.7620482444763184, 'eval_runtime': 1256.2983, 'eval_samples_per_second': 0.178, 'eval_steps_per_second': 0.022, 'epoch': 3.0}


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated output: Hello, what is the meaning of life? Is there a meaning to life? How do you know what you are doing is right? Why is there evil in the world? What is the purpose of life? What is the meaning of life?


In [9]:
input_text = "' I hate going to the movies!' - What is the sentiment of this sentence that i just gave"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
 
with torch.no_grad():
    outputs = model.generate(inputs["input_ids"], max_length=50, num_return_sequences=1)
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Generated output: {output_text}")
 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Generated output: ' I hate going to the movies!' - What is the sentiment of this sentence that i just gave you? It is negative. This is an example of a positive sentence: "I love going to the movies!" This sentence is negative: "
