In [None]:
!pip install -U peft==0.13.2 bitsandbytes==0.44.1 datasets==3.0.1 wandb==0.18.5 scipy==1.13.1 google-cloud-secret-manager

In [None]:
from datetime import datetime
import sys

import torch
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_kbit_training,
)
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq, BitsAndBytesConfig


In [None]:
from google.colab import userdata

try:
  userdata.get("HF_TOKEN")
except userdata.SecretNotFoundError:
  print("HuggingFace Token not found, looking in caltech class project")
  from google.cloud import secretmanager
  import os
  client = secretmanager.SecretManagerServiceClient()
  response = client.access_secret_version(request={"name": "projects/240830225929/secrets/HF_TOKEN/versions/1"})
  os.environ["HF_TOKEN"] = response.payload.data.decode("UTF-8")


In [None]:
base_model = "codellama/CodeLlama-7b-hf"
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=BitsAndBytesConfig(load_in_8bit=True),
    torch_dtype=torch.float16,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")

In [None]:
import os
from google.cloud import storage

# Create a Cloud Storage client
client = storage.Client()

# Get the current username
username = !gcloud config get-value account
username = username[0]
epoch = 340

# Define the bucket name and the directory to download from
bucket_name = 'caltech-class'
checkpoint_directory = f'lab02/{username}/checkpoint-{epoch}/' # Path in GCS
output_dir = f'./sql-code-llama/'  # Local directory to save files

# Get the GCS bucket
bucket = client.get_bucket(bucket_name)

# Create the local directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# List all files in the GCS directory (checkpoint-80) and download them
blobs = bucket.list_blobs(prefix=checkpoint_directory)

for blob in blobs:
    # Get the relative path of the file within the GCS directory
    relative_path = os.path.relpath(blob.name, checkpoint_directory)

    # Skip directories, just process files
    if not blob.name.endswith('/'):  # Ensures it's a file
        # Create any necessary local subdirectories
        local_file_path = os.path.join(output_dir, relative_path)
        local_subdirectory = os.path.dirname(local_file_path)

        if not os.path.exists(local_subdirectory):
            os.makedirs(local_subdirectory)

        # Download the file
        blob.download_to_filename(local_file_path)
        print(f"Downloaded {blob.name} to {local_file_path}")


In [None]:


if torch.cuda.device_count() > 1:
    print(f"multi cuda devices #{torch.cuda.device_count()}")
    # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
    model.is_parallelizable = True
    model.model_parallel = True


In [None]:
from peft import PeftModel
model = PeftModel.from_pretrained(model, output_dir)

In [None]:
eval_prompt = """You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.
### Input:
Which Class has a Frequency MHz larger than 91.5, and a City of license of hyannis, nebraska?

### Context:
CREATE TABLE table_name_12 (class VARCHAR, frequency_mhz VARCHAR, city_of_license VARCHAR)

### Response:
"""

model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))
