In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
from google.cloud.exceptions import NotFound
from google.cloud import bigquery
import pandas as pd
import os

In [None]:
client = bigquery.Client(project="pradeep-genai")

In [None]:
print(client.project)

pradeep-genai


In [None]:
dataset_id = f"{client.project}.flight_reservations"
print(dataset_id)

pradeep-genai.flight_reservations


In [None]:
try:
    dataset = client.get_dataset(dataset_id)
    print(f"Dataset {dataset_id} already exists!")
except NotFound:
    dataset = bigquery.Dataset(dataset_id)
    dataset.location = "US"
    dataset = client.create_dataset(dataset)
    print(f"Dataset {dataset_id} created.")

Dataset pradeep-genai.flight_reservations created.


In [None]:
table_id = f"{dataset_id}.customers"
print(table_id)

pradeep-genai.flight_reservations.customers


In [None]:
customers_schema = [
    bigquery.SchemaField("customer_id", "INT64", mode="REQUIRED"),
    bigquery.SchemaField("first_name", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("last_name", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("email", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("date_of_birth", "DATE", mode="REQUIRED"),
    bigquery.SchemaField("created_at", "DATETIME", mode="REQUIRED"),
]

In [None]:
try:
    customers_table = client.get_table(table_id)
    print(f"Table {table_id} already exists!")
except NotFound:
    customers_table = bigquery.Table(table_id, schema=customers_schema)
    customers_table = client.create_table(customers_table)
    print(f"Table {table_id} created.")

Table pradeep-genai.flight_reservations.customers created.


In [None]:
table_id = f"{dataset_id}.flights"
print(table_id)

pradeep-genai.flight_reservations.flights


In [None]:
flights_schema = [
    bigquery.SchemaField("flight_id", "INT64", mode="REQUIRED"),
    bigquery.SchemaField("origin", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("destination", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("departure_datetime", "DATETIME", mode="REQUIRED"),
    bigquery.SchemaField("arrival_datetime", "DATETIME", mode="REQUIRED"),
    bigquery.SchemaField("carrier", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("price", "FLOAT64", mode="REQUIRED"),
]

In [None]:
try:
    flights_table = client.get_table(table_id)
    print(f"Table {table_id} already exists!")
except NotFound:
    flights_table = bigquery.Table(table_id, schema=flights_schema)
    flights_table = client.create_table(flights_table)
    print(f"Table {table_id} created.")

Table pradeep-genai.flight_reservations.flights created.


In [None]:
table_id = f"{dataset_id}.reservations"
print(table_id)

pradeep-genai.flight_reservations.reservations


In [None]:
reservations_schema = [
    bigquery.SchemaField("reservation_id", "INT64", mode="REQUIRED"),
    bigquery.SchemaField("customer_id", "INT64", mode="REQUIRED"),
    bigquery.SchemaField("flight_id", "INT64", mode="REQUIRED"),
    bigquery.SchemaField("reservation_datetime", "DATETIME", mode="REQUIRED"),
    bigquery.SchemaField("status", "STRING", mode="REQUIRED"),
]

In [None]:
try:
    reservations_table = client.get_table(table_id)
    print(f"Table {table_id} already exists!")
except NotFound:
    reservations_table = bigquery.Table(table_id, schema=reservations_schema)
    reservations_table = client.create_table(reservations_table)
    print(f"Table {table_id} created.")

Table pradeep-genai.flight_reservations.reservations created.


In [None]:
table_id = f"{dataset_id}.transactions"
print(table_id)

pradeep-genai.flight_reservations.transactions


In [None]:
transactions_schema = [
    bigquery.SchemaField("transaction_id", "INT64", mode="REQUIRED"),
    bigquery.SchemaField("reservation_id", "INT64", mode="REQUIRED"),
    bigquery.SchemaField("amount", "FLOAT64", mode="REQUIRED"),
    bigquery.SchemaField("transaction_datetime", "DATETIME", mode="REQUIRED"),
]

In [None]:
try:
    transactions_table = client.get_table(table_id)
    print(f"Table {table_id} already exists!")
except NotFound:
    transactions_table = bigquery.Table(table_id, schema=transactions_schema)
    transactions_table = client.create_table(transactions_table)
    print(f"Table {table_id} created.")

Table pradeep-genai.flight_reservations.transactions created.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/SQL Generation/data/customers.csv')
df.head()

Unnamed: 0,customer_id,first_name,last_name,email,date_of_birth,created_at
0,1,John,Doe,john.doe@example.com,1985-02-15,2021-01-01 08:30:00
1,2,Jane,Doe,jane.doe@example.com,1987-05-23,2021-01-15 12:45:00
2,3,Alice,Johnson,alice.j@example.com,1990-08-10,2021-03-20 09:00:00
3,4,Bob,Smith,bob.smith@example.com,1982-11-12,2021-02-25 10:15:00
4,5,Charlie,Brown,charlie.b@example.com,1995-05-20,2021-06-01 11:30:00


In [None]:
data = [tuple(row) for row in df.to_numpy()]
client.insert_rows(customers_table, data)

[]

In [None]:
df = pd.read_csv('/content/drive/MyDrive/SQL Generation/data/flights.csv')
df.head()

Unnamed: 0,flight_id,origin,destination,departure_datetime,arrival_datetime,carrier,price
0,1,JFK,LAX,2023-11-05 09:00:00,2023-11-05 12:30:00,Delta,450
1,2,LAX,JFK,2023-11-10 15:00:00,2023-11-10 23:30:00,Delta,450
2,3,JFK,ATL,2023-11-15 07:00:00,2023-11-15 09:30:00,Southwest,300
3,4,ATL,LAX,2023-11-18 17:00:00,2023-11-18 19:30:00,Southwest,400
4,5,LAX,SEA,2023-11-22 14:00:00,2023-11-22 16:30:00,United,350


In [None]:
data = [tuple(row) for row in df.to_numpy()]
client.insert_rows(flights_table, data)

[]

In [None]:
df = pd.read_csv('/content/drive/MyDrive/SQL Generation/data/reservations.csv')
df.head()

Unnamed: 0,reservation_id,customer_id,flight_id,reservation_datetime,status
0,1,1,1,2023-10-01 10:30:00,Confirmed
1,2,2,2,2023-10-02 11:00:00,Confirmed
2,3,3,3,2023-10-03 12:15:00,Confirmed
3,4,3,4,2023-10-05 08:30:00,Cancelled
4,5,6,4,2023-10-06 14:20:00,Confirmed


In [None]:
data = [tuple(row) for row in df.to_numpy()]
client.insert_rows(reservations_table, data)

[]

In [None]:
df = pd.read_csv('/content/drive/MyDrive/SQL Generation/data/transactions.csv')
df.head()

Unnamed: 0,transaction_id,reservation_id,amount,transaction_datetime
0,1,1,450,2023-10-01 10:31:00
1,2,2,450,2023-10-02 11:01:00
2,3,3,300,2023-10-03 12:16:00
3,5,5,350,2023-10-06 14:21:00
4,6,6,550,2023-10-10 10:01:00


In [None]:
data = [tuple(row) for row in df.to_numpy()]
client.insert_rows(transactions_table, data)

[]