In [1]:
!pip show torch | grep Version # Checking the cuda version here to specify it when installing xformers

Version: 2.5.1+cu121


In [2]:
!pip install textstat

Collecting textstat
  Downloading textstat-0.7.4-py3-none-any.whl.metadata (14 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.0-py3-none-any.whl.metadata (3.2 kB)
Downloading textstat-0.7.4-py3-none-any.whl (105 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.1/105.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-0.17.0-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyphen, textstat
Successfully installed pyphen-0.17.0 textstat-0.7.4


## Some basic EDA of human-written vs AI-written text

In [3]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
import textstat

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('wordnet')

def analyze_text(text):
    # Tokenization
    tokens = word_tokenize(text)

    # Part of Speech Tagging
    pos_tags = nltk.pos_tag(tokens)

    # Lexical Diversity
    unique_words = set(tokens)
    lexical_diversity = len(unique_words) / len(tokens)

    # Vocabulary Sophistication
    def is_sophisticated_word(word):
        synsets = wordnet.synsets(word)
        return len(synsets) > 1  # More synsets suggest more complex word

    sophisticated_words = [word for word in tokens if is_sophisticated_word(word)]

    # Readability Metrics
    flesch_reading_ease = textstat.flesch_reading_ease(text)
    flesch_kincaid_grade = textstat.flesch_kincaid_grade(text)

    return {
        'token_count': len(tokens),
        'unique_words': len(unique_words),
        'lexical_diversity': lexical_diversity,
        'sophisticated_word_ratio': len(sophisticated_words) / len(tokens),
        'flesch_reading_ease': flesch_reading_ease,
        'flesch_kincaid_grade': flesch_kincaid_grade,
        'pos_distribution': dict(nltk.FreqDist(tag for word, tag in pos_tags))
    }

# Example texts
text_ai = "Deforestation is the process of cutting down trees to use their wood for different purposes, such as building houses or making paper. This process has a negative effect on the environment and on the ecosystem. The most common negative effect is the loss of biodiversity, which means the loss of different species of animals and plants."

text_human = "Deforestation is cutting down trees to use the wood as a variety of things like creating house or paper. This process is detrimental to the environment and ecosystem. Losing biodiversity (the variety of different animals and plants) is the most common and its adverse effect tantamount to say the least."

print("Text 1 Analysis:", analyze_text(text_ai))
print("\nText 2 Analysis:", analyze_text(text_human))

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Text 1 Analysis: {'token_count': 61, 'unique_words': 43, 'lexical_diversity': 0.7049180327868853, 'sophisticated_word_ratio': 0.5901639344262295, 'flesch_reading_ease': 52.49, 'flesch_kincaid_grade': 10.6, 'pos_distribution': {'NN': 13, 'VBZ': 4, 'DT': 8, 'IN': 8, 'VBG': 2, 'RP': 1, 'NNS': 6, 'TO': 1, 'VB': 1, 'PRP$': 1, 'JJ': 6, ',': 2, 'CC': 3, '.': 3, 'RBS': 1, 'WDT': 1}}

Text 2 Analysis: {'token_count': 55, 'unique_words': 41, 'lexical_diversity': 0.7454545454545455, 'sophisticated_word_ratio': 0.5454545454545454, 'flesch_reading_ease': 54.52, 'flesch_kincaid_grade': 9.8, 'pos_distribution': {'NN': 12, 'VBZ': 3, 'VBG': 3, 'RP': 1, 'NNS': 4, 'TO': 3, 'VB': 2, 'DT': 7, 'IN': 4, 'CC': 4, '.': 3, 'JJ': 4, '(': 1, ')': 1, 'RBS': 1, 'PRP$': 1, 'JJS': 1}}


In [4]:
import nltk
import spacy
from textstat import textstat
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Load spaCy English model
nlp = spacy.load('en_core_web_sm')

class AdvancedTextAnalyzer:
    def __init__(self, texts):
        self.texts = texts
        self.docs = [nlp(text) for text in texts]

    def linguistic_complexity_metrics(self):
        metrics = []
        for text, doc in zip(self.texts, self.docs):
            # Advanced linguistic features
            metric = {
                'token_count': len(doc),
                'unique_words': len(set(token.text.lower() for token in doc)),
                'named_entities_count': len(list(doc.ents)),
                'dependency_depth': self._avg_dependency_depth(doc),
                'grammatical_complexity': self._grammatical_complexity(doc),
                'readability': {
                    'flesch_reading_ease': textstat.flesch_reading_ease(text),
                    'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text)
                },
                'pos_distribution': self._pos_distribution(doc)
            }
            metrics.append(metric)
        return metrics

    def _avg_dependency_depth(self, doc):
        depths = [self._token_depth(token) for token in doc]
        return np.mean(depths) if depths else 0

    def _token_depth(self, token, depth=0):
        return depth if token.head == token else self._token_depth(token.head, depth + 1)

    def _grammatical_complexity(self, doc):
        # Calculates clause complexity
        clauses = len([sent for sent in doc.sents])
        complex_sentences = len([sent for sent in doc.sents if len(list(sent.root.children)) > 2])
        return complex_sentences / clauses if clauses > 0 else 0

    def _pos_distribution(self, doc):
        pos_counts = {}
        for token in doc:
            pos_counts[token.pos_] = pos_counts.get(token.pos_, 0) + 1
        return pos_counts

    def semantic_analysis(self):
        # TF-IDF for semantic richness
        vectorizer = TfidfVectorizer()
        tfidf_matrix = vectorizer.fit_transform(self.texts)
        feature_names = vectorizer.get_feature_names_out()

        semantic_metrics = []
        for i, text in enumerate(self.texts):
            top_features = sorted(
                zip(feature_names, tfidf_matrix[i].toarray()[0]),
                key=lambda x: x[1],
                reverse=True
            )[:5]

            semantic_metrics.append({
                'top_semantic_features': top_features,
                'semantic_richness': len(top_features)
            })

        return semantic_metrics

# Texts for analysis
texts = [text_human, text_ai]

analyzer = AdvancedTextAnalyzer(texts)
linguistic_complexity = analyzer.linguistic_complexity_metrics()
semantic_analysis = analyzer.semantic_analysis()

print("Linguistic Complexity Metrics:")
for i, metric in enumerate(linguistic_complexity, 1):
    print(f"Text {i}:", metric)

print("\nSemantic Analysis:")
for i, semantic in enumerate(semantic_analysis, 1):
    print(f"Text {i}:", semantic)

Linguistic Complexity Metrics:
Text 1: {'token_count': 55, 'unique_words': 41, 'named_entities_count': 0, 'dependency_depth': 2.672727272727273, 'grammatical_complexity': 1.0, 'readability': {'flesch_reading_ease': 54.52, 'flesch_kincaid_grade': 9.8}, 'pos_distribution': {'NOUN': 16, 'AUX': 3, 'VERB': 5, 'ADP': 6, 'PART': 2, 'DET': 7, 'CCONJ': 4, 'PUNCT': 5, 'ADJ': 5, 'ADV': 1, 'PRON': 1}}
Text 2: {'token_count': 61, 'unique_words': 42, 'named_entities_count': 0, 'dependency_depth': 3.819672131147541, 'grammatical_complexity': 1.0, 'readability': {'flesch_reading_ease': 52.49, 'flesch_kincaid_grade': 10.6}, 'pos_distribution': {'NOUN': 18, 'AUX': 2, 'DET': 8, 'ADP': 9, 'VERB': 6, 'PART': 1, 'PRON': 2, 'ADJ': 6, 'PUNCT': 5, 'CCONJ': 3, 'ADV': 1}}

Semantic Analysis:
Text 1: {'top_semantic_features': [('the', 0.48649278110604555), ('and', 0.2918956686636273), ('is', 0.2918956686636273), ('to', 0.2918956686636273), ('variety', 0.2734994516764199)], 'semantic_richness': 5}
Text 2: {'top_se

# Llama finetuning starts here

*Don't worry about the dependency conflicts, they will not hinder our project*

In [None]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "trl<0.9.0" peft accelerate bitsandbytes
!pip install -U xformers --index-url https://download.pytorch.org/whl/cu121 #cuda 12.1

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-6fqkg2my/unsloth_4cc1952b7eaf48798ceaa432eb198802
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-6fqkg2my/unsloth_4cc1952b7eaf48798ceaa432eb198802
  Resolved https://github.com/unslothai/unsloth.git to commit 8558bc92b06f9128499484ef737fa71b966ffc23
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2024.11.8 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2024.11.8-py3-none-any.whl.metadata (16 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.gi

Collecting trl<0.9.0
  Downloading trl-0.8.6-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.8.6-py3-none-any.whl (245 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/245.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.2/245.2 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: trl
  Attempting uninstall: trl
    Found existing installation: trl 0.12.1
    Uninstalling trl-0.12.1:
      Successfully uninstalled trl-0.12.1
Successfully installed trl-0.8.6
Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting xformers
  Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl (16.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.7/16.7 MB[0m [31m88.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xformers
Successfully installed xformers-0.0.28.post3


In [None]:
from unsloth import FastLanguageModel
import torch
import os
from transformers import TextStreamer
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
# 1. Configuration
max_seq_length = 2048
dtype = None
load_in_4bit = True
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""

In [None]:
instruction = "Write a creative and engaging essay based on the given prompt: The effects of deforestation and alternatives to fill the gap of the resources and land."
input = ""

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    token=os.getenv("HF_TOKEN")
)

FastLanguageModel.for_inference(model) # Enable native 2x faster inference

==((====))==  Unsloth 2024.11.10: Fast Llama patching. Transformers:4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaExtendedRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): L

In [None]:
inputs = tokenizer(
[
    alpaca_prompt.format(
        instruction, # instruction
        input, # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")
generation_config = {
    "max_new_tokens": 1000,
    "do_sample": True,
    "top_p": 0.9,  # Nucleus sampling
    "temperature": 0.7,  # Adjust for diversity
    "repetition_penalty": 1.2,  # Penalize repetitive tokens
    "no_repeat_ngram_size": 2,  # Avoid repeating n-grams
}
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, **generation_config)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Write a creative and engaging essay based on the given prompt: The effects of deforestation and alternatives to fill the gap of the resources and land.
### Input:

### Response:
Deforestation refers to the destruction or removal of forests from the Earth's surface for human purposes such as farming, logging, mining, etc., which has caused significant ecological changes in our environment leading us towards environmental disasters like global warming, floods, droughts, soil erosion, loss of biodiversity among many other things. It also impacts local communities by affecting their way of life through increased poverty levels due to lack food security resulting from reduced access opportunities provided by forest goods & services e.g., timber production; water purification via filtration systems crea

## Loading and shaping dataset in alpaca format for fine-tuning

In [None]:
EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples, inp, out):
    instructions = examples[inp]
    # inputs = examples["question"]
    outputs = examples[out]
    texts = []
    for instruction, output in zip(instructions, outputs):
        text = alpaca_prompt.format(instruction, "", output) + EOS_TOKEN #input is empty but for multi-step/input problems like python code or maths etc, we usually use the input of those functions here if applicable.
        texts.append(text)
    return {"text": texts}

In [None]:
# Load datasets
dataset = load_dataset("sentence-transformers/eli5", split="train") # reddit explain like I am 5 dataset
dataset1 = load_dataset("gustavecortal/diverse_french_news", split="train") # French news dataset
dataset2 = load_dataset("spanish-ir/messirve", "no_country", split="train") # Spanish qa (web) dataset

# Filter and truncate datasets
dataset = dataset.select(range(min(100_000, len(dataset))))  # Truncate to 100k
dataset1 = dataset1.select(range(min(100_000, len(dataset1))))  # Truncate to 100k

# Filter dataset2 for match_score == 1 (for the best answers) and truncate
dataset2 = dataset2.filter(lambda example: example["match_score"] == 1)
dataset2 = dataset2.select(range(min(100_000, len(dataset2))))  # Truncate to 100k

# Apply formatting
dataset = dataset.map(formatting_prompts_func, batched=True, fn_kwargs={"inp": "question", "out": "answer"})
dataset1 = dataset1.map(formatting_prompts_func, batched=True, fn_kwargs={"inp": "title", "out": "text"})
dataset2 = dataset2.map(formatting_prompts_func, batched=True, fn_kwargs={"inp": "query", "out": "docid_text"})

README.md:   0%|          | 0.00/22.0 [00:00<?, ?B/s]

summarization_train.csv:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

summarization_validation.csv:   0%|          | 0.00/33.2M [00:00<?, ?B/s]

summarization_test.csv:   0%|          | 0.00/33.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/335407 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

README.md:   0%|          | 0.00/20.7k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/85.7M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/28.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/356040 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/101359 [00:00<?, ? examples/s]

Filter:   0%|          | 0/356040 [00:00<?, ? examples/s]

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [None]:
from datasets import concatenate_datasets

# Concatenate the datasets
combined_dataset = concatenate_datasets([dataset, dataset1, dataset2])

# Shuffle the dataset
combined_dataset = combined_dataset.shuffle(seed=3407)

## Define Lora config and build trainer

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
)

Unsloth 2024.11.10 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=combined_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=100,
        learning_rate=1e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none"

    ),
)

Map (num_proc=2):   0%|          | 0/300000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


## *A custom loss function we created to counter uniformity but I need to do more R&D to make the whole process more sophisticated. For now we are getting good enough results without it*

In [None]:
# from transformers import GPT2LMHeadModel
# import torch

# # Custom loss with diversity penalty
# class DiversityPenaltyLoss:
#     def __init__(self, base_loss):
#         self.base_loss = base_loss

#     def __call__(self, logits, labels):
#         # Compute base loss
#         loss = self.base_loss(logits.view(-1, logits.size(-1)), labels.view(-1))

#         # Compute token diversity (entropy-based penalty)
#         probs = torch.nn.functional.softmax(logits, dim=-1)
#         token_entropy = -torch.sum(probs * torch.log(probs + 1e-10), dim=-1).mean()
#         penalty = 1.0 - token_entropy / logits.size(-1)

#         # Add penalty to the loss
#         total_loss = loss + 0.1 * penalty  # Adjust penalty weight as needed
#         return total_loss

# # Update trainer with the custom loss
# trainer.args.loss_fn = DiversityPenaltyLoss(torch.nn.CrossEntropyLoss())

## Training

In [None]:
##Before training: Record initial GPU memory usage
start_memory = torch.cuda.memory_reserved(0)
start_memory_gb = start_memory / (1024 ** 3)  # Convert to GB

# Print initial GPU memory stats
gpu_properties = torch.cuda.get_device_properties(0)
total_memory_gb = gpu_properties.total_memory / (1024 ** 3)
print(f"GPU: {gpu_properties.name}, Total Memory: {total_memory_gb:.2f} GB")
print(f"Initial Memory Reserved: {start_memory_gb:.2f} GB")
# Train the model and record training stats
trainer_stats = trainer.train()
# After training: Record final GPU memory usage
end_memory = torch.cuda.memory_reserved(0)
end_memory_gb = end_memory / (1024 ** 3)  # Convert to GB
# Calculate memory usage during training
memory_used_gb = end_memory_gb - start_memory_gb
memory_used_percentage = (end_memory / gpu_properties.total_memory) * 100
# Print training stats and memory usage
train_time_seconds = trainer_stats.metrics['train_runtime']
train_time_minutes = train_time_seconds / 60
print(f"Training Time: {train_time_seconds:.2f} seconds ({train_time_minutes:.2f} minutes)")
print(f"Peak Memory Reserved: {end_memory_gb:.2f} GB")
print(f"Memory Used for Training: {memory_used_gb:.2f} GB")
print(f"Memory Used Percentage: {memory_used_percentage:.2f}%")

GPU: Tesla T4, Total Memory: 14.75 GB
Initial Memory Reserved: 6.08 GB


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 300,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 100
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,1.9994
2,2.3393
3,2.2847
4,2.3066
5,2.1535
6,2.0667
7,1.8901
8,2.1022
9,2.055
10,2.0468


Training Time: 1298.32 seconds (21.64 minutes)
Peak Memory Reserved: 8.13 GB
Memory Used for Training: 2.04 GB
Memory Used Percentage: 55.12%


In [None]:
# Save the fine-tuned model
model.save_pretrained("finetuned_model")
tokenizer.save_pretrained("finetuned_model")

('finetuned_model/tokenizer_config.json',
 'finetuned_model/special_tokens_map.json',
 'finetuned_model/tokenizer.json')

In [None]:
# Load the fine-tuned model
finetuned_model, finetuned_tokenizer = FastLanguageModel.from_pretrained(
    model_name="finetuned_model",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    token=os.getenv("HF_TOKEN")
)
FastLanguageModel.for_inference(finetuned_model)

==((====))==  Unsloth 2024.11.10: Fast Llama patching. Transformers:4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [None]:
inputs = finetuned_tokenizer(
[
    alpaca_prompt.format(
        instruction, # instruction
        input, # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")
generation_config = {
    "max_new_tokens": 1500,
    "do_sample": True,
    "top_p": 0.9,  # Nucleus sampling
    "temperature": 0.7,  # Adjust for diversity
    "repetition_penalty": 1.2,  # Penalize repetitive tokens
    "no_repeat_ngram_size": 2,  # Avoid repeating n-grams
}
text_streamer = TextStreamer(finetuned_tokenizer)
_ = finetuned_model.generate(**inputs, streamer = text_streamer, **generation_config)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Write a creative and engaging essay based on the given prompt: The effects of deforestation and alternatives to fill the gap of the resources and land.
### Input:

### Response:
Deforestation can lead to soil erosion because it removes vegetation from the ground which would otherwise protect the soil. Deforested areas are also more likely to be exposed to wind or water runoff which will erode away at the top layer of soil over time. Soil loss occurs when rainwater does not seep into the earth but instead runs off quickly after rainfall due to bare patches left behind by cleared forests (e.g., roads). This causes flooding in nearby rivers leading upriver communities downstream who depend upon them for drinking water as well as irrigation during dry seasons; this problem becomes exacerbated where th

## *Download the model (LoRA adapters) locally*

In [None]:
# import shutil
# from google.colab import files

# # Path to the directory you want to zip
# dir_path = '/content/finetuned_model'  # Replace with your directory path
# zip_path = '/content/finetuned_model.zip'

# # Zip the directory
# shutil.make_archive(zip_path.replace('.zip', ''), 'zip', dir_path)

# # Download the zipped file
# files.download(zip_path)

## *Save the LoRA adapters to drive*

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
gdrive_dir = '/content/drive/My Drive/llama3.1fintuned'
os.makedirs(gdrive_dir, exist_ok=True)
print(f"Directory created at: {gdrive_dir}")

Directory created at: /content/drive/My Drive/llama3.1fintuned


In [None]:
import shutil
source_folder = '/content/finetuned_model'
shutil.copytree(source_folder, os.path.join(gdrive_dir, 'llama_fintuned'))
