In [None]:
!pip install google-cloud-aiplatform google-cloud-storage google-auth google-auth-oauthlib jsonlines

Collecting jsonlines
  Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)
Downloading jsonlines-4.0.0-py3-none-any.whl (8.7 kB)
Installing collected packages: jsonlines
Successfully installed jsonlines-4.0.0


In [None]:
import os
import time
import vertexai
from vertexai.preview.tuning import sft
from google.colab import drive
from google.cloud import storage
from google.cloud import aiplatform
from google.oauth2 import service_account
from vertexai.generative_models import GenerativeModel

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


#### Defining Service Account, Input Data, Output/Training Data Paths

In [None]:
# Define paths
service_account_key_path = '' # Path to service account key
input_json_path = ''    # Path to input json file
jsonl_output_path = '' # Path to output jsonl file

In [None]:
# Set the environment variable for service account credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = service_account_key_path

#### Define Project ID, Region, and GCS Bucket URI

In [None]:
# Define Project ID, Region, and GCS Bucket URI
PROJECT_ID = ""  # Replace with your Google Cloud Project ID
REGION = "us-central1"  # Desired region for Vertex AI
BUCKET_NAME = f"{PROJECT_ID}-sft-gemini-demo"                                                                                                                                 mm
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [None]:
# Initialize Vertex AI with credentials from environment variable
vertexai.init(project=PROJECT_ID, location=REGION)

####  Converting and preparing Training data

In [None]:
# Prepare JSONL data if not already done
def prepare_code_generation_jsonl(input_json_path, output_jsonl_path, limit=50):
    import json
    import jsonlines

    with open(input_json_path, 'r') as f:
        original_data = json.load(f)

    with jsonlines.open(output_jsonl_path, 'w') as writer:
        for i, example in enumerate(original_data):
            if i >= limit:
                break
            formatted_example = {
                "messages": [
                    {"role": "user", "content": example["instruction"]},
                    {"role": "model", "content": example["output"]}
                ]
            }
            writer.write(formatted_example)

prepare_code_generation_jsonl(input_json_path, jsonl_output_path, limit=263)

#### Initializing and Creating bucket

In [None]:
# Initialize GCS client and upload the data
client = storage.Client(project=PROJECT_ID)
bucket = client.bucket(BUCKET_NAME)

# Create bucket if it does not exist
if not bucket.exists():
    bucket.create(location=REGION)
    print(f"Bucket created: {BUCKET_URI}")

In [None]:
# Define GCS paths for the data file
local_file_path = jsonl_output_path
gcs_blob_path = 'fine_tune_data_gemini.jsonl'
blob = bucket.blob(gcs_blob_path)
blob.upload_from_filename(local_file_path)
print(f"File uploaded to gs://{BUCKET_NAME}/{gcs_blob_path}")

File uploaded to gs://sonorous-pact-405102-sft-gemini-demo/fine_tune_data_gemini.jsonl


#### Defining model and Fine Tuning Job

In [None]:
# Define the model ID and training dataset URI
MODEL_ID = "gemini-1.0-pro-002"
TRAINING_DATA_URI = f"gs://{BUCKET_NAME}/{gcs_blob_path}"

# Start the fine-tuning job
sft_tuning_job = sft.train(
    source_model=MODEL_ID,
    train_dataset=TRAINING_DATA_URI
)

# Monitor tuning job progress
while not sft_tuning_job.has_ended:
    time.sleep(60)
    sft_tuning_job.refresh()
    print("Tuning job in progress...")

# Display tuning job information after completion
print("Tuning job completed.")
print("Tuning Job Info:", sft_tuning_job.to_dict())
print(f"Tuned Model name: {sft_tuning_job.tuned_model_name}")
print(f"Endpoint name: {sft_tuning_job.tuned_model_endpoint_name}")

INFO:vertexai.tuning._tuning:Creating SupervisedTuningJob
INFO:vertexai.tuning._tuning:SupervisedTuningJob created. Resource name: projects/311149115011/locations/us-central1/tuningJobs/8249116052160512000
INFO:vertexai.tuning._tuning:To use this SupervisedTuningJob in another session:
INFO:vertexai.tuning._tuning:tuning_job = sft.SupervisedTuningJob('projects/311149115011/locations/us-central1/tuningJobs/8249116052160512000')
INFO:vertexai.tuning._tuning:View Tuning Job:
https://console.cloud.google.com/vertex-ai/generative/language/locations/us-central1/tuning/tuningJob/8249116052160512000?project=311149115011


Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...


Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job in progress...
Tuning job i

#### Generating outputs

In [None]:
# Define the full model path based on the tuning job info
MODEL_NAME = "" # Define model Endpoint name here

# Load the fine-tuned model
model = GenerativeModel(model_name=MODEL_NAME)

# Test with a prompt similar to your training data
try:
    # Sample prompts from your training data
    prompts = [
      "Create a header for a minimalist personal website, focusing on clean typography and a simple navigation bar.",
      "Design a bold and striking header for a creative agency website, incorporating a strong visual element and a prominent call-to-action.",
      "Develop a functional and user-friendly header for an e-commerce website, including a logo, search bar, shopping cart icon, and clear navigation.",
      "Create a visually appealing header for a blog website, featuring a large hero image, a tagline, and a simple navigation bar.",
      ]

    # Send each prompt to the model and print the generated responses
    for prompt in prompts:
        response = model.generate_content(prompt)
        print(f"Prompt: {prompt}\nGenerated Response:\n{response.text}\n")
        print("-" * 50)

except Exception as e:
    print("Error generating response:", e)

Prompt: Create a header for a minimalist personal website, focusing on clean typography and a simple navigation bar.
Generated Response:
<html>
<head>
<style>
.section-1 {
  	width: 100%;
  	background: #ffffff;
  	display: flex;
  	flex-direction: column;
  	align-items: center;
  	justify-content: center;
  	padding: 10px;
  	position: relative;
  	z-index: 0;
}
.link {
  	font-size: 20px;
  	color: #000000;
}
.nav-link {
  	display: flex;
  	flex-direction: row;
  	align-items: center;
  	justify-content: center;
  	gap: 32px;
}
.nav-container {
  	display: flex;
  	flex-direction: row;
  	align-items: center;
  	justify-content: center;
  	gap: 24px;
}
.container {
  	display: flex;
  	flex-direction: row;
  	align-items: flex-start;
  	justify-content: flex-start;
  	gap: 56px;
}
.div {
  	position: absolute;
  	top: calc(50% - 39.5px);
  	left: calc(50% - 167px);
  	width: 374px;
  	height: 89px;
  	background-image: url(Ellipse.png);
  	background-size: 100%;
  	background-posit