In [1]:
%%capture
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install azure-ai-textanalytics
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
!pip install unsloth

In [2]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel
import re

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [4]:
def classify_text(query):
    try:

        ai_endpoint = 'https://myclassification51.cognitiveservices.azure.com/'
        ai_key = '2AguPGY8KcIcngKeVUidX7JxS9DrkKbEvxpkVYatZUGuE0Nab5bbJQQJ99AKACYeBjFXJ3w3AAAaACOGYgZA'
        project_name = 'ClassifyLab'
        deployment_name = 'MyDeployment'

        # Create client using endpoint and key
        credential = AzureKeyCredential(ai_key)
        ai_client = TextAnalyticsClient(endpoint=ai_endpoint, credential=credential)

        # Prepare the query for classification
        batchedDocuments = [query]

        # Get Classification
        operation = ai_client.begin_single_label_classify(
            batchedDocuments,
            project_name=project_name,
            deployment_name=deployment_name
        )

        document_results = operation.result()

        # Extract classification result
        for classification_result in document_results:
            if classification_result.kind == "CustomDocumentClassification":
                classification = classification_result.classifications[0]
                return classification.category, classification.confidence_score
            elif classification_result.is_error:
                return None, classification_result.error.message

    except Exception as ex:
        return None, str(ex)

In [5]:
model, tokenizer = FastLanguageModel.from_pretrained("mohamed517/Arabic-fine-Tuning-LLaMA-Model")

==((====))==  Unsloth 2024.11.6: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.1k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Unsloth 2024.11.6 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [6]:
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
)

In [7]:
def generation(question):

    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

    category, confidence_score = classify_text(question)

    context = f"انت معالج بالذكاء الاصطناعي قيد التدريب ومهمتك هي تقديم دعم عاطفي مدروس ومخصص لكل مستخدم بناء علي حالته النفسيه الحاليه ستقوم بالاستماع بعنايه الي مخاوفهم ومشاعرهم مع مراعاه ان الحاله النفسيه للمريض هي {category} قم بتقديم استجابات ملاءمه للوضع الذي يمر به استخدم معرفتك بمختلف المناهج العلاجيه لتقديم تقنيات ومحادثات داعمه بلهجه ودوده ومتفهمه تذكر انك مورد للدعم العاطفي والارشاد ولست بديلا عن المعالج البشري تعامل مع المريض بصدق واحترام وكن مرنا في محادثاتك لتتناسب مع حالته النفسيه وحاول التخفيف عنه بطريقه لطيفه ومتوازنه"

    # Create the messages list with the context and user input
    messages = [
        {"from": "system", "value": context},
        {"from": "human", "value": question},
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True, # Must add for generation
        return_tensors = "pt",
    ).to("cuda")

    outputs = model.generate(input_ids = inputs, max_new_tokens = 400, use_cache = True)
    model_answer = tokenizer.batch_decode(outputs)

    return model_answer

In [8]:
def adjustPattern(model_answer):
    
    # Assuming model_answer is a list with the model's output string at the first index
    model_answer_str = model_answer[0]

    # Adjusted regex pattern to capture all relevant sections
    pattern = re.compile(r'<\|start_header_id\|>(.*?)<\|end_header_id\|>(.*?)<\|eot_id\|>', re.DOTALL)

    # Find all matches and store them in a structured format
    sections = pattern.findall(model_answer_str)
    output = {"user": [], "bot": []}

    for role, content in sections:
        role = role.strip().lower()  # Normalize the role for comparison
        content = content.strip()  # Clean up the content
        if role == 'system':
            continue  # Skip the system role
        elif role == 'user':
            output["user"].append(content)
        elif role == 'assistant':
            output["bot"].append(content)

    # Check for any direct assistant response after the user's input if not captured above
    if 'assistant' not in [role.strip().lower() for role, _ in sections]:
        assistant_start_index = model_answer_str.find('<|start_header_id|>assistant<|end_header_id|>')
        if assistant_start_index != -1:
            assistant_content = model_answer_str[assistant_start_index:].split('<|eot_id|>')[0]
            output["bot"].append(
                assistant_content.replace('<|start_header_id|>', '').replace('<|end_header_id|>', '').strip()
            )
    
    return output

In [21]:
question = "علي طول مضايقه ونفسيتي تعبانه وبس ابكي ما ابغي اقابل احد وعلي طول اخاصم اطفالي واصرخ عليهم واروح الغرفه لوحدي مع العلم انه في الليل ازعل علي تصرفاتي فما الحل"

model_answer = generation(question)

In [22]:
# Call the function
parsed_output = adjustPattern(model_answer)

# Access and print each role's content
for role, contents in parsed_output.items():
    print(f"{role.capitalize()} response:")
    for content in contents:
        print(f"\n{content}\n")

User response:

علي طول مضايقه ونفسيتي تعبانه وبس ابكي ما ابغي اقابل احد وعلي طول اخاصم اطفالي واصرخ عليهم واروح الغرفه لوحدي مع العلم انه في الليل ازعل علي تصرفاتي فما الحل

Bot response:

الطول النفسي والجسدي يحتاجان الي تقيم طبي لمعرفه الحاله النفسيه بشكل دقيق وبالتاكيد يحتاج الي علاج نفسي مكثف ومخصص لكل حاله

