# Test Model

In [None]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

## Definition of an Alpaca Prompt Model for Generating Contextual Responses

In [None]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
"""

## Configuration of Model Parameters for Efficient Inference

In [None]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True

## Loading and Configuring Pre-trained Language Model for Inference with FastLanguageModel

In [None]:
if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "Nourhen2001/SummarizeReviews_model_v2", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# alpaca_prompt = You MUST copy from above!


==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


## Generating Customer Review Summaries Using Pre-trained Language Model with Tokenizer and TextStreamer

In [None]:

inputs = tokenizer(
[
    alpaca_prompt.format(
        "Summarize customer reviews.",  # instruction
        "Product: Bluetooth Headphones - SoundBeats | Positive: 'Excellent sound quality at a great price.', 'Fast delivery and helpful customer support during setup.', 'Great value for money.' | Negative: 'Could be cheaper for the build quality.', 'Delivery was delayed a few days, but customer service was responsive.",  # input
        "" # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1000)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Summarize customer reviews.

### Input:
Product: Bluetooth Headphones - SoundBeats | Positive: 'Excellent sound quality at a great price.', 'Fast delivery and helpful customer support during setup.', 'Great value for money.' | Negative: 'Could be cheaper for the build quality.', 'Delivery was delayed a few days, but customer service was responsive.

### Response:
SoundBeats Bluetooth headphones offer great value for money with excellent sound quality, fast delivery, and good customer support. Some customers feel the build quality could be better, and delivery was delayed for a few.<|eot_id|>


**THE RECOMMANDATION MODEL**


In [None]:
from unsloth import FastLanguageModel


In [None]:
%%capture
# Load a model for recommendations (could be same/different from your summarization model)
rec_model, rec_tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-instruct-v0.2",  # Small & fast for recommendations
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)


FastLanguageModel.for_inference(rec_model)

In [None]:
# Define a function to generate recommendations
def generate_recommendations(summary):
    recommendation_prompt = """I will give you customer feedback and you respond with 3 business recommendations.

Example 1:
Feedback: "Great battery life but uncomfortable fit"
Recommendations:
1. Redesign ear cushions for better comfort while maintaining battery
2. Offer multiple ear tip sizes in the package
3. Market the battery life as a key differentiator

Example 2:
Feedback: "Fast shipping but product arrived damaged"
Recommendations:
1. Improve packaging materials to prevent damage
2. Implement quality checks before shipping
3. Create a faster replacement policy for damaged items

Now generate recommendations for:
{}""".format(summary)  # Now summary is correctly passed

    inputs = rec_tokenizer([recommendation_prompt], return_tensors="pt").to("cuda")
    outputs = rec_model.generate(**inputs, max_new_tokens=256)

    return rec_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example usage
summary = "SoundBeats headphones offer excellent sound quality at a reasonable price, with fast delivery and helpful customer support. However, some feel the build quality could be better for the price, and delivery was delayed for a few days."
recommendations = generate_recommendations(summary)
print(recommendations)

I will give you customer feedback and you respond with 3 business recommendations.

Example 1:
Feedback: "Great battery life but uncomfortable fit"
Recommendations:
1. Redesign ear cushions for better comfort while maintaining battery
2. Offer multiple ear tip sizes in the package
3. Market the battery life as a key differentiator

Example 2:
Feedback: "Fast shipping but product arrived damaged"
Recommendations:
1. Improve packaging materials to prevent damage
2. Implement quality checks before shipping
3. Create a faster replacement policy for damaged items

Now generate recommendations for:
SoundBeats headphones offer excellent sound quality at a reasonable price, with fast delivery and helpful customer support. However, some feel the build quality could be better for the price, and delivery was delayed for a few days.

Recommendations:
1. Upgrade materials used in the headphone construction to improve perceived value
2. Implement more robust shipping methods to ensure timely deliver

In [None]:
# Step 1: Get summary from your existing model
inputs = tokenizer(
    [
        alpaca_prompt.format(
            "Summarize customer reviews.",
            "Product: Bluetooth Headphones - SoundBeats | Positive: 'Excellent sound quality at a great price.', 'Fast delivery and helpful customer support during setup.', 'Great value for money.' | Negative: 'Could be cheaper for the build quality.', 'Delivery was delayed a few days, but customer service was responsive.",
            ""
        )
    ], return_tensors="pt").to("cuda")

summary = model.generate(**inputs, max_new_tokens=1000)
summary_text = tokenizer.decode(summary[0], skip_special_tokens=True)

# Extract clean summary text
clean_summary = summary_text.split("### Response:")[-1].strip()

# Step 2: Generate recommendations
print("Summary from Model 1:\n", clean_summary)
print("\nRecommendations from Model 2:\n", generate_recommendations(clean_summary))


Summary from Model 1:
 SoundBeats Bluetooth headphones are praised for their sound quality and value, though some feel the build quality and delivery could be improved.

Recommendations from Model 2:
 I will give you customer feedback and you respond with 3 business recommendations.

Example 1:
Feedback: "Great battery life but uncomfortable fit"
Recommendations:
1. Redesign ear cushions for better comfort while maintaining battery
2. Offer multiple ear tip sizes in the package
3. Market the battery life as a key differentiator

Example 2:
Feedback: "Fast shipping but product arrived damaged"
Recommendations:
1. Improve packaging materials to prevent damage
2. Implement quality checks before shipping
3. Create a faster replacement policy for damaged items

Now generate recommendations for:
SoundBeats Bluetooth headphones are praised for their sound quality and value, though some feel the build quality and delivery could be improved.

Recommendations:
1. Upgrade materials used in the he

In [None]:
%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

# Load the first model (Summarization)
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="Nourhen2001/SummarizeReviews_model_v2",
    max_seq_length=1024,
    dtype=None,
    load_in_4bit=True,  # Utiliser 4-bit
    device_map="auto",  # Optimiser l'utilisation du GPU et CPU
    llm_int8_enable_fp32_cpu_offload=True  # Activer l'offloading contrôlé
)

FastLanguageModel.for_inference(model)

# Load the second model (Recommendations)
rec_model, rec_tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/mistral-7b-instruct-v0.2",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)
FastLanguageModel.for_inference(rec_model)

# Define prompt formats
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
"""

recommendation_prompt = """I will give you customer feedback and you respond with 3 business recommendations.

Example 1:
Feedback: "Great battery life but uncomfortable fit"
Recommendations:
1. Redesign ear cushions for better comfort while maintaining battery life.
2. Offer multiple ear tip sizes in the package.
3. Market the battery life as a key differentiator.

Example 2:
Feedback: "Fast shipping but product arrived damaged"
Recommendations:
1. Improve packaging materials to prevent damage.
2. Implement quality checks before shipping.
3. Create a faster replacement policy for damaged items.

Now generate recommendations for:
{}"""

def generate_summary(reviews):
    inputs = tokenizer(
        [alpaca_prompt.format("Summarize customer reviews.", reviews, "")],
        return_tensors="pt"
    ).to("cuda")

    summary_output = model.generate(**inputs, max_new_tokens=1000)
    summary_text = tokenizer.decode(summary_output[0], skip_special_tokens=True)
    return summary_text.split("### Response:")[-1].strip()  # Extract clean summary

def generate_recommendations(summary):
    inputs = rec_tokenizer(
        [recommendation_prompt.format(summary)], return_tensors="pt"
    ).to("cuda")

    outputs = rec_model.generate(**inputs, max_new_tokens=256)
    return rec_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Sample input
customer_reviews = """Product: Bluetooth Headphones - SoundBeats | Positive: 'Excellent sound quality at a great price.',
'Fast delivery and helpful customer support during setup.', 'Great value for money.' |
Negative: 'Could be cheaper for the build quality.', 'Delivery was delayed a few days, but customer service was responsive.'"""

# Step 1: Generate summary
summary_result = generate_summary(customer_reviews)
print("Summary from Model 1:\n", summary_result)

# Step 2: Generate recommendations
recommendations_result = generate_recommendations(summary_result)
print("\nRecommendations from Model 2:\n", recommendations_result)


TypeError: LlamaForCausalLM.__init__() got an unexpected keyword argument 'llm_int8_enable_fp32_cpu_offload'

In [None]:
!nvidia-smi


Thu Mar 27 23:33:44 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   73C    P0             33W /   70W |   11174MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                