# AutoML Dataset

## Get access to Google Cloud from the notebook

### Set the project ID and Bucket URI

In [9]:
PROJECT_ID = "big-data-project1-347618"
BUCKET_URI = ""  # TODO

### Authenticate to Google Cloud

In [10]:
from google.colab import auth

auth.authenticate_user()
!gcloud config set project {PROJECT_ID}

Updated property [core/project].


## Create BigQuery client

We are now ready to use BigQuery.

We first create the BigQuery client handle, i.e., a [Client](https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html#google.cloud.bigquery.client.Client) object.

In [11]:
import google.cloud.bigquery as bq

client = bq.Client(project=PROJECT_ID)

## Create dataset in BigQuery

We will create the dataset first, so that we can associate tables to it. Running the code below, we obtain a [Dataset](https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.dataset.Dataset.html) object.

In [13]:
dataset = client.create_dataset("bdcc_22", exists_ok=True)

## Create the tables and associated schema

In [14]:
import pandas as pd

### Load class data to use

In [15]:
classes = pd.read_csv("classes.csv")
classes

Unnamed: 0,Label,Description
0,/m/011k07,Tortoise
1,/m/011q46kg,Container
2,/m/012074,Magpie
3,/m/0120dh,Sea turtle
4,/m/01226z,Football
...,...,...
596,/m/0qmmr,Wheelchair
597,/m/0wdt60w,Rugby ball
598,/m/0xfy,Armadillo
599,/m/0xzly,Maracas


### Load image data to use

In [16]:
image_labels = pd.read_csv("image-labels.csv")
image_labels

Unnamed: 0,ImageId,Label
0,000026e7ee790996,/m/07j7r
1,000026e7ee790996,/m/05s2s
2,000062a39995e348,/m/015p6
3,000062a39995e348,/m/05s2s
4,0000c64e1253d68f,/m/0k4j
...,...,...
394370,fffc6543b32da1dd,/m/04rky
394371,fffc6543b32da1dd,/m/09j2d
394372,fffc6543b32da1dd,/m/0cnyhnx
394373,fffc6543b32da1dd,/m/0jbk


### Load relation data to use

In [17]:
relations = pd.read_csv("relations.csv")
relations

Unnamed: 0,ImageId,Label1,Relation,Label2
0,4c7a12ee5a5c9300,/m/03bt1vf,at,/m/04bcr3
1,c77c950eb460b8a7,/m/05r655,at,/m/04bcr3
2,4986d824ed80ddcc,/m/01mzpv,at,/m/01y9k5
3,f24d04be72d3fa6d,/m/01mzpv,at,/m/04bcr3
4,450f9387396d0b6b,/m/01mzpv,at,/m/04bcr3
...,...,...,...,...
2763,9536bf2f5617530f,/m/03bt1vf,wears,/m/080hkjn
2764,e0a5c22dbd7c89b1,/m/04yx4,wears,/m/01940j
2765,8dc8592b97d3ef98,/m/03bt1vf,wears,/m/080hkjn
2766,ecea6142d2061c97,/m/03bt1vf,wears,/m/080hkjn


### Create the corresponding tables in BigQuery

In [27]:
# classes_table = bq.Table(PROJECT_ID +'.openimages.classes')
# classes_table.schema = (
#  bq.SchemaField('Label', 'STRING'),
#  bq.SchemaField('Description', 'STRING')
# )
# client.create_table(classes_table)

In [28]:
# image_labels_table = bq.Table(PROJECT_ID +'.openimages.image_labels')
# image_labels_table.schema = (
#  bq.SchemaField('ImageID',      'STRING'),
#  bq.SchemaField('Label',      'STRING')
# )
# client.create_table(image_labels_table)

In [29]:
# relations_table = bq.Table(PROJECT_ID +'.openimages.relations')
# relations_table.schema = (
#  bq.SchemaField('ImageId',      'STRING'),
#  bq.SchemaField('Label1',      'STRING'),
#  bq.SchemaField('Relation',      'STRING'),
#  bq.SchemaField('Label2',      'STRING')
# )
# client.create_table(relations_table)

### Insert the data onto the BigQuery table

## Define the classes for the model


In [30]:
CLASSES = [
    ("Cheetah",),
    ("Dolphin",),
    ("Fox",),
    ("Goldfish",),
    ("Horse",),
    ("Koala",),
    ("Lynx",),
    ("Owl",),
    ("Penguin",),
    ("Scorpion",),
]