# Generate a large set of rows and upload to gcs

In [2]:
BUCKET = 'model_experimentation_2025'
REGION = 'us-central1' 

! gsutil mb -l $REGION gs://$BUCKET

Creating gs://model_experimentation_2025/...


In [8]:
import numpy as np
import pandas as pd
from google.cloud import storage

def generate_random_data(num_samples):
    """
    Generates a pandas dataframe for the test model.
    """
    x = np.random.normal(0.0, 1.0, size=(num_samples, 3))
    df = pd.DataFrame(
        x,
        columns=["col1", "col2", "col3"],
        dtype="float64",
    )
    return df


def upload_csv_file_to_gcs(bucket_name: str, source_file_name: str, destination_blob_name: str):
    """Uploads a local CSV file to the GCS bucket."""
    # Instantiates a client
    storage_client = storage.Client()

    # Get the bucket
    bucket = storage_client.bucket(bucket_name)

    # Create a new blob and upload the file's content.
    blob = bucket.blob(destination_blob_name)

    # Upload the local file
    blob.upload_from_filename(source_file_name, content_type='text/csv')

    print(
        f"File {source_file_name} uploaded to gs://{bucket_name}/{destination_blob_name}."
    )

In [9]:
random_training_df = generate_random_data(100_000)

In [10]:
random_training_df.head()

Unnamed: 0,col1,col2,col3
0,2.071094,0.356696,1.267315
1,0.885052,1.821395,-0.597404
2,-1.179673,0.920685,-1.610942
3,0.861493,1.235437,1.485277
4,0.126781,-0.34568,-0.392108


In [None]:
file_name = 'example_training.csv'
random_training_df.to_csv(file_name, index=False, header=True) 
#upload to gcs
upload_csv_file_to_gcs(BUCKET, file_name, file_name)

File example_training.csv uploaded to gs://model_experimentation_2025/example_training.csv.
