In [1]:
import google.auth
from google.cloud import bigquery

In [2]:
# The source data is in Google Drive and the target table is in Bigquery
# So we need to add these scopes to the authentication request
scopes = [
    "https://www.googleapis.com/auth/drive",
    "https://www.googleapis.com/auth/bigquery",
    ]

In [3]:
# Authenticate with Google
credentials, project = google.auth.default(scopes=scopes)

In [4]:
# Initialize the BigQuery client
bq_client = bigquery.Client(credentials=credentials, project=project)

In [5]:
# Get the dataset reference, which, coincidentally, is called "reference"
dataset_name = "reference"
dataset_ref = bq_client.get_dataset(dataset_name)

In [6]:
# Get the Additional Organizations table reference
ao_table_name = "additional_organizations" # ao = additional organizations
ao_table_ref = dataset_ref.table(ao_table_name)

In [7]:
# Define Additional Organizations table schema
ao_schema = [
    bigquery.SchemaField("EIN", "INTEGER"),
    bigquery.SchemaField("NAME", "STRING"),
    bigquery.SchemaField("ICO", "STRING"),
    bigquery.SchemaField("STREET", "STRING"),
    bigquery.SchemaField("CITY", "STRING"),
    bigquery.SchemaField("STATE", "STRING"),
    bigquery.SchemaField("ZIP", "STRING"),
    bigquery.SchemaField("GROUP", "INTEGER"),
    bigquery.SchemaField("SUBSECTION", "INTEGER"),
    bigquery.SchemaField("AFFILIATION", "INTEGER"),
    bigquery.SchemaField("CLASSIFICATION", "INTEGER"),
    bigquery.SchemaField("RULING", "INTEGER"),
    bigquery.SchemaField("DEDUCTIBILITY", "INTEGER"),
    bigquery.SchemaField("FOUNDATION", "INTEGER"),
    bigquery.SchemaField("ACTIVITY", "INTEGER"),
    bigquery.SchemaField("ORGANIZATION", "INTEGER"),
    bigquery.SchemaField("STATUS", "INTEGER"),
    bigquery.SchemaField("TAX_PERIOD", "INTEGER"),
    bigquery.SchemaField("ASSET_CD", "INTEGER"),
    bigquery.SchemaField("INCOME_CD", "INTEGER"),
    bigquery.SchemaField("FILING_REQ_CD", "INTEGER"),
    bigquery.SchemaField("PF_FILING_REQ_CD", "INTEGER"),
    bigquery.SchemaField("ACCT_PD", "INTEGER"),
    bigquery.SchemaField("ASSET_AMT", "INTEGER"),
    bigquery.SchemaField("INCOME_AMT", "INTEGER"),
    bigquery.SchemaField("REVENUE_AMT", "INTEGER"),
    bigquery.SchemaField("NTEE_CD", "STRING"),
    bigquery.SchemaField("SORT_NAME", "STRING")
]

In [8]:
# Initialize the BiqQuery table
ao_table = bigquery.Table(table_ref=ao_table_ref, schema=ao_schema)

In [9]:
# The source table is an external data source, in this case Google Sheets
# Here we initialize this configuration for the additional orgs table
ao_external_config = bigquery.ExternalConfig("GOOGLE_SHEETS")

In [10]:
ao_sheet_url = "INSERT ADDITIONAL ORGANIZATIONS IF NECESSARY, OTHERWISE OMIT"
ao_sheet_name = "INSERT SHEET NAME"

In [11]:
ao_external_config.source_uris = [ao_sheet_url]
ao_external_config.options.skip_leading_rows = 1
ao_table.external_data_configuration = ao_external_config

In [12]:
# Create the additional organizations table
bq_client.create_table(ao_table, exists_ok=True)

Table(TableReference(DatasetReference('decent-carving-397313', 'reference'), 'additional_organizations'))

In [13]:
acs_table_name = "acs_data_final"
acs_table_ref = dataset_ref.table(acs_table_name)

In [14]:
acs_schema = [
    bigquery.SchemaField("geo_id", "STRING"),
    bigquery.SchemaField("name", "STRING"),
    bigquery.SchemaField("unemployment_rate_percent", "FLOAT64"),
    bigquery.SchemaField("labor_force_participation_rate_percent", "FLOAT64"),
    bigquery.SchemaField("less_than_hs_percent", "FLOAT64"),
    bigquery.SchemaField("total_population", "INT64"),
    bigquery.SchemaField("median_household_income", "INT64"),
    bigquery.SchemaField("gini", "FLOAT64"),
    bigquery.SchemaField("gini_percentile", "FLOAT64"),
    bigquery.SchemaField("gini_top_twenty", "BOOL"),
]

In [15]:
acs_table = bigquery.Table(table_ref=acs_table_ref, schema=acs_schema)

In [16]:
acs_external_config = bigquery.ExternalConfig("GOOGLE_SHEETS")

In [17]:
acs_sheet_url = "INSERT AMERICAN COMMUNITY SURVEY DATA FRAME IF NECESSARY OTHERWISE OMIT"
acs_sheet_name = "INSERT SHEET NAME"

In [18]:
acs_external_config.source_uris = [acs_sheet_url]
acs_external_config.options.skip_leading_rows = 1
acs_table.external_data_configuration = acs_external_config

In [19]:
# Create the ACS data final table
bq_client.create_table(acs_table, exists_ok=True)

Table(TableReference(DatasetReference('decent-carving-397313', 'reference'), 'acs_data_final'))