In [1]:
from google.cloud import storage
from google.cloud import bigquery

project_id = "aaron-nayoung"
bucket_name = "book_data_p0"
folder_name = "initial_load"
dataset_name = "books_raw"
region = "us"

storage_client = storage.Client()
bq_client = bigquery.Client()

In [3]:
def create_load_table(file_name, table_name, schema, delimiter=","):

  uri = "gs://{}/{}/{}".format(bucket_name, folder_name, file_name)
  table_id = "{}.{}.{}".format(project_id, dataset_name, table_name)

  table = bigquery.Table(table_id, schema=schema)
  table = bq_client.create_table(table, exists_ok=True)
  print("Created table {}".format(table.table_id))

  # remove the load_time field from the schema before loading the data,
  # the load_time value will be auto-generated
  del schema[-1]

  job_config = bigquery.LoadJobConfig(
        schema=schema,
        skip_leading_rows=1,
        source_format=bigquery.SourceFormat.CSV,
        write_disposition="WRITE_TRUNCATE",
        field_delimiter=delimiter
      )

  load_job = bq_client.load_table_from_uri(uri, table_id, job_config=job_config)
  load_job.result()

  destination_table = bq_client.get_table(table_id)
  print("Loaded {} rows.".format(destination_table.num_rows))

# bookauthors

In [None]:
file_name = 'bookauthors.csv'
table_name = 'bookauthors'

schema = [
  bigquery.SchemaField("book_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("author_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table(file_name, table_name, schema)

Created table bookauthors
Loaded 17642 rows.


# books

In [None]:
file_name = 'books.csv'
table_name = 'books'

schema = [
  bigquery.SchemaField("book_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("title", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("isbn13", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("language_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("num_pages", "INTEGER", mode="NULLABLE"),
  bigquery.SchemaField("publication_date", "DATE", mode="NULLABLE"),
  bigquery.SchemaField("publisher_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table(file_name, table_name, schema)

Created table books
Loaded 11127 rows.


# bookslanguage

In [None]:
file_name = 'bookslanguage.csv'
table_name = 'bookslanguage'

schema = [
  bigquery.SchemaField("language_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("language_code", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("language_name", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table(file_name, table_name, schema)

Created table bookslanguage
Loaded 27 rows.


# libraries

In [8]:
file_name = 'libraries.csv'
table_name = 'libraries'

schema = [
  bigquery.SchemaField("library_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("stabr", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("libname", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("zip4", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("cnty", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("full_address", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("load_time", "TIMESTAMP", mode="NULLABLE", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table(file_name, table_name, schema)

Created table libraries
Loaded 9215 rows.


# Sales_rating

In [6]:
file_name = 'sales_rating.csv'
table_name = 'sales_rating'

schema = [
  bigquery.SchemaField("sale_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("publishing_year", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("book_name", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("author", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("language_code", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("author_rating", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("book_average_rating", "FLOAT", mode="NULLABLE"),
  bigquery.SchemaField("book_rating_count", "INTEGER", mode="NULLABLE"),
  bigquery.SchemaField("genre", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("gross sales", "FLOAT", mode="NULLABLE"),
  bigquery.SchemaField("publisher revenue", "FLOAT", mode="NULLABLE"),
  bigquery.SchemaField("sale price", "FLOAT", mode="NULLABLE"),
  bigquery.SchemaField("sales rank", "INTEGER", mode="NULLABLE"),
  bigquery.SchemaField("publisher", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("units_sold", "INTEGER", mode="NULLABLE"),
  bigquery.SchemaField("load_time", "TIMESTAMP", mode="NULLABLE", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table(file_name, table_name, schema)

Created table sales_rating
Loaded 1070 rows.


# author

In [None]:
file_name = 'author.csv'
table_name = 'author'

schema = [
  bigquery.SchemaField("author_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("author_name", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table(file_name, table_name, schema)

Created table author
Loaded 9235 rows.


# faker_authors

In [None]:
file_name = 'faker_authors.csv'
table_name = 'faker_authors'
delimiter = '|'

schema = [
  bigquery.SchemaField("author_id", "INTEGER", mode="REQUIRED"),
  bigquery.SchemaField("date_of_birth", "DATE", mode="NULLABLE"),
  bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
  bigquery.SchemaField("country", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("email", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("load_time", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP"),
]

create_load_table(file_name, table_name, schema, delimiter)

Created table faker_authors
Loaded 9235 rows.


# **Verify loads**

In [None]:
sql = "select table_name from {}.INFORMATION_SCHEMA.TABLES order by table_name".format(dataset_name)
query = (sql)

query_job = bq_client.query(
    query,
    location=region,
)

results = query_job.result()

for table in query_job:
    table_name = table.values()[0]
    print("table:", table_name)

table: Author_info
table: author
table: bookauthors
table: books
table: bookslanguage
table: faker_authors
table: libraries


In [None]:
%%bigquery
select * from books_raw.author limit 5

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,author_id,author_name,load_time
0,617,Avi,2024-01-29 00:16:12.287561+00:00
1,3358,Hob,2024-01-29 00:16:12.287561+00:00
2,4733,Kara,2024-01-29 00:16:12.287561+00:00
3,5314,Livy,2024-01-29 00:16:12.287561+00:00
4,5512,Manu,2024-01-29 00:16:12.287561+00:00


In [None]:
%%bigquery
select * from books_raw.faker_authors limit 5

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,author_id,date_of_birth,age,country,email,load_time
0,882,2003-05-28,20,Chad,thomasashley@example.com,2024-01-29 00:16:17.574414+00:00
1,3861,2002-03-11,21,Chad,mowen@example.com,2024-01-29 00:16:17.574414+00:00
2,3213,2001-09-19,22,Chad,sarahtodd@example.org,2024-01-29 00:16:17.574414+00:00
3,1299,2000-10-31,23,Chad,zhill@example.net,2024-01-29 00:16:17.574414+00:00
4,3430,1999-12-28,24,Chad,castrowendy@example.net,2024-01-29 00:16:17.574414+00:00


In [None]:
%%bigquery
select * from books_raw.bookauthors limit 5

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,book_id,author_id,load_time
0,10539,1,2024-01-29 00:16:00.829852+00:00
1,8109,2,2024-01-29 00:16:00.829852+00:00
2,2792,3,2024-01-29 00:16:00.829852+00:00
3,6228,4,2024-01-29 00:16:00.829852+00:00
4,1058,5,2024-01-29 00:16:00.829852+00:00


In [None]:
%%bigquery
select * from books_raw.books limit 5

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,book_id,title,isbn13,language_id,num_pages,publication_date,publisher_id,load_time
0,427,The Patricia Cornwell CD Audio Treasury: All T...,9780060791216,1,0,2005-07-26,881,2024-01-29 00:16:03.575101+00:00
1,2645,Return to the Planet of the Apes #2: Escape fr...,9780345251671,1,0,1976-04-12,180,2024-01-29 00:16:03.575101+00:00
2,2953,Five Rings of Fire (Able Team #11),9780373612116,1,0,1984-02-23,776,2024-01-29 00:16:03.575101+00:00
3,3072,The Michael Crichton Collection: Jurassic Park...,9780375415807,1,0,2000-06-09,1642,2024-01-29 00:16:03.575101+00:00
4,4039,Shipwreck (Island I),9780439023313,1,0,2007-02-01,1765,2024-01-29 00:16:03.575101+00:00


In [None]:
%%bigquery
select * from books_raw.bookslanguage limit 5

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,language_id,language_code,language_name,load_time
0,1,eng,English,2024-01-29 00:16:05.976573+00:00
1,2,en-US,United States English,2024-01-29 00:16:05.976573+00:00
2,3,fre,French,2024-01-29 00:16:05.976573+00:00
3,4,spa,Spanish,2024-01-29 00:16:05.976573+00:00
4,5,en-GB,British English,2024-01-29 00:16:05.976573+00:00


In [9]:
%%bigquery
select * from books_raw.libraries limit 5

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,library_id,stabr,libname,address,city,zip,zip4,cnty,full_address,load_time
0,36,AK,KEGOAYAH KOZGA LIBRARY,100 WEST 7TH AVENUE,NOME,99762,M,NOME,"100 WEST 7TH AVENUE NOME, AK 99762-M",2024-01-29 23:59:06.732594+00:00
1,44,AK,SITKA PUBLIC LIBRARY,320 HARBOR DRIVE,SITKA,99835,7553,SITKA,"320 HARBOR DRIVE SITKA, AK 99835-7553",2024-01-29 23:59:06.732594+00:00
2,4,AK,KUSKOKWIM CONSORTIUM LIBRARY,420 CHIEF EDDIE HOFFMAN HIGHWAY,BETHEL,99559,M,BETHEL,"420 CHIEF EDDIE HOFFMAN HIGHWAY BETHEL, AK 995...",2024-01-29 23:59:06.732594+00:00
3,67,AK,ANIAK PUBLIC LIBRARY,270 RIVERFRONT DRIVE,ANIAK,99557,M,BETHEL,"270 RIVERFRONT DRIVE ANIAK, AK 99557-M",2024-01-29 23:59:06.732594+00:00
4,3,AK,ANDERSON COMMUNITY LIBRARY,101 FIRST STREET,ANDERSON,99744,M,DENALI,"101 FIRST STREET ANDERSON, AK 99744-M",2024-01-29 23:59:06.732594+00:00


In [7]:
%%bigquery
select * from books_raw.sales_rating limit 5

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,sale_id,publishing_year,book_name,author,language_code,author_rating,book_average_rating,book_rating_count,genre,gross sales,publisher revenue,sale price,sales rank,publisher,units_sold,load_time
0,911,2014,The Rosie Effect,Graeme Simsion,eng,Intermediate,3.56,27308,genre fiction,1225.44,735.264,11.04,1068,Macmillan,4360,2024-01-29 23:34:38.611311+00:00
1,836,1958,The Once and Future King,T.H. White,eng,Excellent,4.08,76911,nonfiction,746.7,448.02,6.55,973,Macmillan,2889,2024-01-29 23:34:38.611311+00:00
2,171,2009,Hundra????ringen som klev ut genom f????nstret...,"Jonas Jonasson, Rod Bradbury",en-GB,Intermediate,3.81,92275,genre fiction,5200.65,3120.39,8.89,183,Macmillan,585,2024-01-29 23:34:38.611311+00:00
3,617,2011,The Psychopath Test,Jon Ronson,eng,Intermediate,3.91,44097,genre fiction,492.7,295.62,3.79,701,Macmillan,49032,2024-01-29 23:34:38.611311+00:00
4,746,1988,The Case for Christ,Lee Strobel,,Excellent,4.1,67128,genre fiction,799.11,479.466,6.83,859,Macmillan,31752,2024-01-29 23:34:38.611311+00:00
