<a href="https://colab.research.google.com/github/nareshpreviu/vertex_ai_model/blob/main/Copy_of_CSV_TrainedModel_Upload_Delopy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import auth
auth.authenticate_user()
PROJECT_ID = 'chatbotapp-47182'
!gcloud config set project {PROJECT_ID}

Updated property [core/project].


In [None]:
from google.cloud import storage

# Initialize the Google Cloud Storage client
storage_client = storage.Client()
bucket_name = 'trained-data-bucket'
bucket = storage_client.get_bucket(bucket_name)

print(f'Bucket {bucket_name} is in region: {bucket.location}')


Bucket trained-data-bucket is in region: ASIA-SOUTHEAST1


In [None]:
# Define the service account name and email
SERVICE_ACCOUNT_NAME = 'prediction-model-data'
SERVICE_ACCOUNT_EMAIL = f'{SERVICE_ACCOUNT_NAME}@{PROJECT_ID}.iam.gserviceaccount.com'

# Create the service account
!gcloud iam service-accounts create {SERVICE_ACCOUNT_NAME} --display-name "Model Vertex AI Service Account"

# Grant necessary roles
!gcloud projects add-iam-policy-binding {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT_EMAIL} --role=roles/aiplatform.admin
!gcloud projects add-iam-policy-binding {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT_EMAIL} --role=roles/storage.admin
!gcloud projects add-iam-policy-binding {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT_EMAIL} --role=roles/storage.objectViewer
!gcloud projects add-iam-policy-binding {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT_EMAIL} --role=roles/storage.objectCreator
!gcloud projects add-iam-policy-binding {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT_EMAIL} --role=roles/storage.objectAdmin
!gcloud projects add-iam-policy-binding {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT_EMAIL} --role=roles/iam.serviceAccountUser
!gcloud projects add-iam-policy-binding {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT_EMAIL} --role=roles/iam.serviceAccountTokenCreator


Created service account [prediction-model-data].
Updated IAM policy for project [chatbotapp-47182].
bindings:
- members:
  - serviceAccount:model-service-account@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-model-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-test-data-model@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-test-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-trained-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-trained-test-data@chatbotapp-47182.iam.gserviceaccount.com
  role: roles/aiplatform.admin
- members:
  - serviceAccount:service-24334301270@gcp-sa-aiplatform-cc.iam.gserviceaccount.com
  role: roles/aiplatform.customCodeServiceAgent
- members:
  - serviceAccount:service-24334301270@gcp-sa-aiplatform-vm.iam.gserviceaccount.com
  role: roles/aiplatform.note

In [None]:
!gcloud iam service-accounts keys create ~/key.json --iam-account {SERVICE_ACCOUNT_EMAIL}

created key [7fcac10df777ccacee1c4ca60578a4b359990a4a] of type [json] as [/root/key.json] for [prediction-model-data@chatbotapp-47182.iam.gserviceaccount.com]


In [None]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/root/key.json'

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
import joblib
from google.cloud import storage
from google.cloud import aiplatform
import io

# Function to preprocess text data
def preprocess_text(text):
    # Implement your text preprocessing logic here
    return text

# Load data from CSV file in Google Cloud Storage
def load_data_from_gcs(bucket_name, file_path):
    """Loads data from a CSV file in Google Cloud Storage."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_path)
    content = blob.download_as_string().decode('utf-8')
    data = pd.read_csv(io.StringIO(content))
    return data

# Load data from CSV in Google Cloud Storage
BUCKET_NAME = 'trained-data-bucket'
FILE_PATH = 'previu_health_faq.csv'

data = load_data_from_gcs(BUCKET_NAME, FILE_PATH)

# Preprocess data
data['Question'] = data['Question'].apply(preprocess_text)

# Check for NaN values and handle them
if data['Question'].isnull().any() or data['Answer'].isnull().any():
    print("Data contains NaN values. Removing NaN values...")
    data.dropna(subset=['Question', 'Answer'], inplace=True)

# Split data into X (questions) and y (answers)
X = data['Question']
y = data['Answer']

# Transform text data using TF-IDF vectorizer
vectorizer = TfidfVectorizer()
X_transformed = vectorizer.fit_transform(X)

# Train a model (example using SVM)
print("Training the model...")
model = SVC(kernel='linear')
model.fit(X_transformed, y)
print("Model training completed.")

# Save the trained model to a file with a custom name
CUSTOM_MODEL_FILE = 'prediction_test_data_model.joblib'
joblib.dump(model, CUSTOM_MODEL_FILE)


Data contains NaN values. Removing NaN values...
Training the model...
Model training completed.


['prediction_test_data_model.joblib']

In [None]:
print("Uploading model to Google Cloud Storage...")
storage_client = storage.Client()
MODEL_PATH = f'models/{CUSTOM_MODEL_FILE}'

# Ensure the bucket exists
bucket = storage_client.bucket(BUCKET_NAME)
if not bucket.exists():
    bucket = storage_client.create_bucket(BUCKET_NAME)

# Upload model file
blob = bucket.blob(MODEL_PATH)
blob.upload_from_filename(CUSTOM_MODEL_FILE)

print(f'Model uploaded to: gs://{BUCKET_NAME}/{MODEL_PATH}')

Uploading model to Google Cloud Storage...
Model uploaded to: gs://trained-data-bucket/models/prediction_test_data_model.joblib


In [None]:
from google.cloud import storage

# Set your bucket name and model file name
BUCKET_NAME = 'trained-data-bucket'
MODEL_PATH = 'models/prediction_test_data_model.joblib'

# Initialize storage client
storage_client = storage.Client()

# Get the bucket
bucket = storage_client.bucket(BUCKET_NAME)

# List files in the bucket
blobs = bucket.list_blobs(prefix='models/')
for blob in blobs:
    print(blob.name)

# Check if the model file exists
blob = bucket.blob(MODEL_PATH)
if blob.exists():
    print(f"File {MODEL_PATH} exists in bucket {BUCKET_NAME}.")
else:
    print(f"File {MODEL_PATH} does not exist in bucket {BUCKET_NAME}.")

models/prediction_test_data_model.joblib
File models/prediction_test_data_model.joblib exists in bucket trained-data-bucket.


In [None]:
# Grant IAM permissions using gsutil (run this cell in Colab)
bucket_name = 'trained-data-bucket'
service_account = 'prediction-model-data@chatbotapp-47182.iam.gserviceaccount.com'

# Grant objectViewer role to the service account
!gsutil iam ch serviceAccount:{service_account}:objectViewer gs://{bucket_name}

In [None]:
# Grant Storage Admin Permissions
!gcloud projects add-iam-policy-binding chatbotapp-47182 \
    --member="serviceAccount:prediction-model-data@chatbotapp-47182.iam.gserviceaccount.com" \
    --role="roles/storage.admin"

# Grant AI Platform Admin Permissions
!gcloud projects add-iam-policy-binding chatbotapp-47182 \
    --member="serviceAccount:prediction-model-data@chatbotapp-47182.iam.gserviceaccount.com" \
    --role="roles/aiplatform.admin"

# Enable Required APIs
!gcloud services enable aiplatform.googleapis.com
!gcloud services enable storage.googleapis.com

Updated IAM policy for project [chatbotapp-47182].
bindings:
- members:
  - serviceAccount:model-service-account@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-model-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-test-data-model@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-test-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-trained-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-trained-test-data@chatbotapp-47182.iam.gserviceaccount.com
  role: roles/aiplatform.admin
- members:
  - serviceAccount:service-24334301270@gcp-sa-aiplatform-cc.iam.gserviceaccount.com
  role: roles/aiplatform.customCodeServiceAgent
- members:
  - serviceAccount:service-24334301270@gcp-sa-aiplatform-vm.iam.gserviceaccount.com
  role: roles/aiplatform.notebookServiceAgent
- members:
  - serviceAccount:se

In [None]:
# Replace PROJECT_ID and SERVICE_ACCOUNT with your actual values
PROJECT_ID = 'chatbotapp-47182'
SERVICE_ACCOUNT = 'prediction-model-data@chatbotapp-47182.iam.gserviceaccount.com'

# Add the Storage Admin role to the service account
!gcloud projects add-iam-policy-binding $PROJECT_ID \
    --member="serviceAccount:$SERVICE_ACCOUNT" \
    --role="roles/storage.admin"

# Add the AI Platform Admin role to the service account
!gcloud projects add-iam-policy-binding $PROJECT_ID \
    --member="serviceAccount:$SERVICE_ACCOUNT" \
    --role="roles/aiplatform.admin"


Updated IAM policy for project [chatbotapp-47182].
bindings:
- members:
  - serviceAccount:model-service-account@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-model-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-test-data-model@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-test-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-trained-data@chatbotapp-47182.iam.gserviceaccount.com
  - serviceAccount:prediction-trained-test-data@chatbotapp-47182.iam.gserviceaccount.com
  role: roles/aiplatform.admin
- members:
  - serviceAccount:service-24334301270@gcp-sa-aiplatform-cc.iam.gserviceaccount.com
  role: roles/aiplatform.customCodeServiceAgent
- members:
  - serviceAccount:service-24334301270@gcp-sa-aiplatform-vm.iam.gserviceaccount.com
  role: roles/aiplatform.notebookServiceAgent
- members:
  - serviceAccount:se

In [None]:
import os
from google.cloud import aiplatform, storage
from google.api_core.exceptions import NotFound

# Set your project ID and region
PROJECT_ID = 'chatbotapp-47182'
REGION = 'asia-southeast1'  # Ensure this matches your Vertex AI region
BUCKET_NAME = 'trained-data-bucket'
CUSTOM_MODEL_FILE = 'prediction_test_data_model.joblib'

# Set the path to your service account key file
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/root/key.json'

# Initialize Vertex AI
print("Initializing Vertex AI...")
aiplatform.init(project=PROJECT_ID, location=REGION)

# Upload model to Google Cloud Storage
print("Uploading model to Google Cloud Storage...")
storage_client = storage.Client()

# Ensure the bucket exists
bucket = storage_client.bucket(BUCKET_NAME)
if not bucket.exists():
    print(f'Bucket {BUCKET_NAME} does not exist, creating...')
    bucket.create(location=REGION)
else:
    print(f'Bucket {BUCKET_NAME} exists.')

# Upload model file
MODEL_PATH = f'models/{CUSTOM_MODEL_FILE}'
blob = bucket.blob(MODEL_PATH)
try:
    blob.upload_from_filename(CUSTOM_MODEL_FILE)
    print(f'Model uploaded to: gs://{BUCKET_NAME}/{MODEL_PATH}')
except FileNotFoundError as e:
    print(f'Error uploading model file: {e}')

# Verify uploaded file in Google Cloud Storage
print("Listing files in bucket for verification...")
blobs = list(bucket.list_blobs(prefix='models'))
for blob in blobs:
    print(f'{blob.name}')

# Deploy model to Vertex AI
print("Deploying model to Vertex AI...")
try:
    model = aiplatform.Model.upload(
        display_name='question-answering-model',
        artifact_uri=f'gs://{BUCKET_NAME}/{MODEL_PATH}',
        serving_container_image_uri='us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-24:latest',
    )
    model.wait_for_resource_creation()
    print(f'Model deployed to Vertex AI: {model.resource_name}')
except NotFound as e:
    print(f'Error deploying model to Vertex AI: {e}')
except Exception as e:
    print(f'Unexpected error deploying model to Vertex AI: {e}')


Initializing Vertex AI...
Uploading model to Google Cloud Storage...
Bucket trained-data-bucket exists.
Model uploaded to: gs://trained-data-bucket/models/prediction_test_data_model.joblib
Listing files in bucket for verification...
models/prediction_test_data_model.joblib
Deploying model to Vertex AI...
Error deploying model to Vertex AI: 404 There are no files in directory "gs://trained-data-bucket/models/prediction_test_data_model.joblib". Please check if the Cloud Storage URI is correct or copy at least one file to the directory.
