In [5]:
from google.cloud import bigquery
import pandas as pd
import pandas_gbq
import numpy as np

# Initialize BigQuery client
client = bigquery.Client()

# SQL query to retrieve data
query = """
SELECT
    category_for.first_level.full[SAFE_OFFSET(0)].name AS field_name,
    AVG(metrics.field_citation_ratio) AS avg_fcr,
    COUNT(DISTINCT id) AS publication_count,
    CASE WHEN EXISTS (SELECT 1 FROM UNNEST(research_org_country_names) as c WHERE c = 'United States') THEN 'US' ELSE 'Global' END as us_global
FROM
    `covid-19-dimensions-ai.data.publications`
WHERE metrics.field_citation_ratio IS NOT NULL
GROUP BY field_name, us_global
"""

# Run the query and store the results in a Pandas DataFrame
df = client.query(query).to_dataframe()

# Create a simplified dataframe for export
df_export = df[['field_name', 'avg_fcr', 'publication_count', 'us_global']]

# save to gbq table for export
pandas_gbq.to_gbq(df_export, "final_data.bubble_plot_data", project_id="covid-19-task", if_exists='replace')

print("Data exported to BigQuery table: covid-19-task.bubble_plot_data")

100%|██████████| 1/1 [00:00<00:00, 3669.56it/s]

Data exported to BigQuery table: covid-19-task.bubble_plot_data



