In [16]:
from google.cloud import storage
import json
import pandas as pd

bucket_name = "ai101_directory"



## Data Ingestion

In [2]:

def list_directories(bucket_name, prefix):
    """Lists pseudo-directories within a GCS bucket.

    Args:
        bucket_name: The name of the GCS bucket.
        prefix: An optional prefix used to filter objects (simulates subfolders).
    """

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)

    blobs = bucket.list_blobs(prefix=prefix)

    directories=set()
    for blob in blobs:
        directories.add(blob.name.split("/")[1])
    
    return directories

bucket_name = 'ai101_directory'
root_prefix = 'Faces/'  # Simulates starting in a subfolder

directory_names = list_directories(bucket_name, root_prefix)
print(directory_names)


{'Charlize Theron', 'Virat Kohli', 'Anushka Sharma', 'Hugh Jackman', 'Brad Pitt', 'Billie Eilish', 'Natalie Portman', 'Elizabeth Olsen', 'Marmik', 'Jessica Alba', 'Hrithik Roshan', 'Courtney Cox', 'Robert Downey Jr', 'Lisa Kudrow', 'Margot Robbie', 'Tom Cruise', 'Henry Cavill', 'Vijay Deverakonda', 'Kashyap', 'Alexandra Daddario', 'Akshay Kumar', 'Claire Holt', 'Zac Efron', 'Andy Samberg', 'Amitabh Bachchan', 'Dwayne Johnson', 'Camila Cabello', 'Alia Bhatt', 'Ellen Degeneres', 'Roger Federer', 'Priyanka Chopra'}


In [3]:
directory_names=list(directory_names)
directory_names

['Charlize Theron',
 'Virat Kohli',
 'Anushka Sharma',
 'Hugh Jackman',
 'Brad Pitt',
 'Billie Eilish',
 'Natalie Portman',
 'Elizabeth Olsen',
 'Marmik',
 'Jessica Alba',
 'Hrithik Roshan',
 'Courtney Cox',
 'Robert Downey Jr',
 'Lisa Kudrow',
 'Margot Robbie',
 'Tom Cruise',
 'Henry Cavill',
 'Vijay Deverakonda',
 'Kashyap',
 'Alexandra Daddario',
 'Akshay Kumar',
 'Claire Holt',
 'Zac Efron',
 'Andy Samberg',
 'Amitabh Bachchan',
 'Dwayne Johnson',
 'Camila Cabello',
 'Alia Bhatt',
 'Ellen Degeneres',
 'Roger Federer',
 'Priyanka Chopra']

In [24]:
def create_file_paths(directories):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    
    file_path_list=[]
    for directory_name in directories:
        blobs = bucket.list_blobs(prefix=root_prefix+directory_name+"/")
        for blob in blobs:
            file_path = f"gs://ai101_directory/{blob.name}"
            f#ile_path_list.append({"imageGcsUri":file_path, "displayName":directory_name})
            file_path_list.append({"image_path":file_path, "label":directory_name})
    return file_path_list

In [25]:
file_path_list=create_file_paths(directory_names)

In [26]:
file_path_list[:1]

[{'image_path': 'gs://ai101_directory/Faces/Virat Kohli/Virat Kohli_0.jpg',
  'label': 'Virat Kohli'}]

In [29]:
## create csv file for Dataset Import
pd.DataFrame(data=file_path_list).to_csv("image_data.csv", index=False)

In [30]:
from google.cloud import storage

def upload_to_cloud_storage(bucket_name, source_file_name, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print(f'File {source_file_name} uploaded to {destination_blob_name}.')

source_file_name = "image_data.csv"   
destination_blob_name = "image_data.csv"  

upload_to_cloud_storage(bucket_name, source_file_name, destination_blob_name)


File image_data.csv uploaded to image_data.csv.


## inference

In [10]:
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_predict_image_classification_sample]
import base64

from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import predict


def predict_image_classification_sample(
    project: str,
    endpoint_id: str,
    filename: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    with open(filename, "rb") as f:
        file_content = f.read()

    # The format of each instance should conform to the deployed model's prediction input schema.
    encoded_content = base64.b64encode(file_content).decode("utf-8")
    instance = predict.instance.ImageClassificationPredictionInstance(
        content=encoded_content,
    ).to_value()
    instances = [instance]
    # See gs://google-cloud-aiplatform/schema/predict/params/image_classification_1.0.0.yaml for the format of the parameters.
    parameters = predict.params.ImageClassificationPredictionParams(
        confidence_threshold=0.5,
        max_predictions=5,
    ).to_value()
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    print("response")
    print(" deployed_model_id:", response.deployed_model_id)
    # See gs://google-cloud-aiplatform/schema/predict/prediction/image_classification_1.0.0.yaml for the format of the predictions.
    predictions = response.predictions
    for prediction in predictions:
        print(" prediction:", dict(prediction))


# [END aiplatform_predict_image_classification_sample]

In [11]:
predict_image_classification_sample(
    project="711757102271",
    endpoint_id="8219590488462721024",
    location="us-central1",
    filename="data/Akshay Kumar_37.jpg"
)

response
 deployed_model_id: 400835759998763008
 prediction: {'displayNames': ['Akshay Kumar'], 'confidences': [0.968981862], 'ids': ['476653958681591808']}
