<a href="https://colab.research.google.com/github/ramapathakota05/TensorFlow/blob/master/hf_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Install necessary libraries
!pip install transformers datasets

import pandas as pd
from datasets import Dataset
from transformers import LlamaForSequenceClassification, LlamaTokenizer, Trainer, TrainingArguments
import torch

# Create synthetic NHS dataset
data = {
    "age": [25, 34, 45, 50, 29, 55, 65, 70, 80, 40],
    "blood_glucose_level": [90, 150, 180, 200, 85, 220, 250, 160, 300, 130],
    "bmi": [22.5, 28.0, 30.5, 33.0, 24.0, 35.5, 29.0, 32.5, 28.0, 27.0],
    "has_diabetes": [0, 1, 1, 1, 0, 1, 1, 1, 1, 1],  # 1 for diabetes, 0 for no diabetes
    "requires_further_tests": [0, 1, 1, 1, 0, 1, 1, 1, 1, 1]  # 1 for further tests, 0 for no further tests
}

df = pd.DataFrame(data)
dataset = Dataset.from_pandas(df)
dataset = dataset.train_test_split(test_size=0.2)
train_dataset = dataset['train']
test_dataset = dataset['test']

# Load the tokenizer and model
tokenizer = LlamaTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf')
model = LlamaForSequenceClassification.from_pretrained('meta-llama/Llama-2-7b-hf', num_labels=2)

# Tokenize the datasets
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# Set format for PyTorch
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Set up the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train and evaluate the model
trainer.train()
trainer.evaluate()

# Function to make predictions
def predict(new_data):
    inputs = tokenizer(new_data, return_tensors="pt", padding="max_length", truncation=True)
    outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    return probabilities

# Example of predicting for a new patient
new_patient = {
    "age": 60,
    "blood_glucose_level": 240,
    "bmi": 31.0
}

new_data = f"Age: {new_patient['age']}, Blood Glucose Level: {new_patient['blood_glucose_level']}, BMI: {new_patient['bmi']}"
print(predict(new_data))




OSError: Can't load tokenizer for 'meta-llama/Llama-2-7b-hf'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'meta-llama/Llama-2-7b-hf' is the correct path to a directory containing all relevant files for a LlamaTokenizer tokenizer.