In [None]:
import streamlit as st
import pandas as pd
import numpy as np
from utils.helpers import user_dir, repo_dir, data_dir
import hydralit_components as hc
import time
from datetime import datetime
from google.cloud import bigquery
from google.oauth2 import service_account
from google.api_core.exceptions import Conflict, NotFound, Forbidden
# import pydata_google_auth
import threading
from utils.simple_auth import *



labelled_schema = [
    bigquery.SchemaField(
        "label", "STRING", mode="REQUIRED",
        description="RA-assigned label for pair"),
    bigquery.SchemaField(
        "labeler", "STRING", mode="REQUIRED",
        description="Which RA labeled this pair"),
    bigquery.SchemaField(
        "reason", "STRING", mode="NULLABLE",
        description="RA-specified reason for labeling pair as violative"),
    bigquery.SchemaField(
        "other_reason", "STRING", mode="NULLABLE",
        description="RA-specified other reason"),
    bigquery.SchemaField(
        "regret_id", "STRING", mode="REQUIRED",
        description="YouTube video ID for regretted video"),
    bigquery.SchemaField(
        "recommendation_id", "STRING", mode="REQUIRED",
        description="YouTube video ID for recommended video"),
    bigquery.SchemaField(
        "regret_title", "STRING", mode="REQUIRED",
        description="Video title for regretted video"),
    bigquery.SchemaField(
        "recommendation_title", "STRING", mode="REQUIRED",
        description="Video title for recommended video"),
    bigquery.SchemaField(
        "regret_description", "STRING", mode="REQUIRED",
        description="Video description for regretted video"),
    bigquery.SchemaField(
        "recommendation_description", "STRING", mode="REQUIRED",
        description="Video description for recommended video"),
    bigquery.SchemaField(
        "regret_channel", "STRING", mode="REQUIRED",
        description="Video channel for regretted video"),
    bigquery.SchemaField(
        "recommendation_channel", "STRING", mode="REQUIRED",
        description="Video channel for recommended video"),
    bigquery.SchemaField(
        "label_time", "DATETIME", mode="REQUIRED",
        description="Date & time at which the data is labelled"),
    bigquery.SchemaField(
        "selection_method", "STRING", mode="REQUIRED",
        description="How this pair was selected and by which model"),
    bigquery.SchemaField(
        "disturbing", "STRING", mode="REQUIRED",
        description="Whether the video is disturbing, hateful, or misinformation"
    )
]


corpus_table_id = "regrets-reporter-dev.ra_can_read.pairs_sample"
labelled_table_id = "regrets-reporter-dev.ra_can_write.labelled_ra"
language_table_id = "regrets-reporter-dev.ra_can_read.langs"
model_table_id = "regrets-reporter-dev.ra_can_read.model_predictions_v1"

_table_created = {
    corpus_table_id: False,
    labelled_table_id: False,
}


def table_exists(bq_client, table):
    if _table_created[table]:
        return True
    try:
        bq_client.get_table(table)
        return True
    except (NotFound, Forbidden):
        return False


def get_table(table, schema):
    return bigquery.Table(table, schema=schema)

In [None]:
def connect_to_db(user):
    if user == 'admin':
        # TODO: BEFORE PROD DEPLOYMENT SWITCH BACK TO USER AUTHENTICATION FOR ADMIN
        # credentials = pydata_google_auth.get_user_credentials(
        #    ['https://www.googleapis.com/auth/bigquery'],
        #    use_local_webserver=True,
        # )
        credentials = service_account.Credentials.from_service_account_info(
            dict(**st.secrets.ranu_testing), scopes=["https://www.googleapis.com/auth/cloud-platform"],
        )

    else:
        credentials = service_account.Credentials.from_service_account_info(
            dict(**st.secrets.bq_service_account), scopes=["https://www.googleapis.com/auth/cloud-platform"],
        )
    project_id = "regrets-reporter-dev"
    bq_client = bigquery.Client(
        project=project_id, credentials=credentials)
    return bq_client

In [None]:
bq_client = connect_to_db('admin')

bq_client.query(
                f"DELETE FROM {labelled_table_id} WHERE labeler='ranu'")

In [None]:
df_labelled = bq_client.query(
                f"SELECT * FROM {labelled_table_id}").result().to_dataframe()

In [None]:
df_labelled

In [None]:
get_table(labelled_table_id, labelled_schema)