In [None]:
!pip install keyrings.google-artifactregistry-auth

In [None]:
!pip install google-cloud-bigquery faker pandas

In [None]:
from google.cloud import bigquery

# Define the schema for the BigQuery table
schema = [
    bigquery.SchemaField("Email", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("Timestamp", "TIMESTAMP", mode="REQUIRED"),
    bigquery.SchemaField("Region", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("Feedback", "STRING", mode="REQUIRED"),
]

In [None]:
from faker import Faker
import random
from google.cloud import bigquery
import pandas as pd

# Initialize Faker and BigQuery client
fake = Faker()
client = bigquery.Client()

# Define sample positive and negative feedback
positive_feedback = [
    "Absolutely love this product! My skin feels amazing.",
    "Exceeded my expectations; will definitely buy again.",
    "Fantastic results; my complexion has never been better.",
    "Highly recommend to anyone looking for quality skincare.",
    "A must-have in my daily routine; very satisfied."
]

negative_feedback = [
    "Did not work for me; caused irritation.",
    "Overpriced and underwhelming; not worth it.",
    "No noticeable difference after weeks of use.",
    "Disappointed with the results; won't repurchase.",
    "Left my skin feeling dry and uncomfortable."
]

# Define sample regions (10 counties in the UK)
regions = [
    "Greater London",
    "West Midlands",
    "Greater Manchester",
    "West Yorkshire",
    "Kent",
    "Essex",
    "Merseyside",
    "South Yorkshire",
    "Hampshire",
    "Lancashire"
]

# Function to generate a single feedback record
def generate_feedback():
    email = fake.email()
    timestamp = fake.date_time_this_year()
    region = random.choice(regions)
    feedback = random.choice(positive_feedback + negative_feedback)
    return {
        "Email": email,
        "Timestamp": timestamp,
        "Region": region,
        "Feedback": feedback
    }

# Generate 1,000 feedback records
feedback_data = [generate_feedback() for _ in range(1000)]

# Convert to a DataFrame
df = pd.DataFrame(feedback_data)

# Define your BigQuery dataset and table
dataset_id = 'continuous_queries_demo'
table_id = 'customer_feedback'
table_ref = client.dataset(dataset_id).table(table_id)

# Create the table if it doesn't exist
try:
    client.get_table(table_ref)
    print(f"Table {table_id} already exists.")
except Exception:
    table = bigquery.Table(table_ref, schema=schema)
    table = client.create_table(table)
    print(f"Table {table_id} created.")

# Load data into BigQuery
job = client.load_table_from_dataframe(df, table_ref)
job.result()  # Wait for the job to complete
print(f"Loaded {job.output_rows} rows into {dataset_id}:{table_id}.")