## Steps to setup connection of Databricks with MYSQL AND BQ
- Reads the config_table from MYSQL Workbench and take only those tables which are having active_flag as 1 and load_flag as 1 
- Export each BQ Table to GCS Stagging Layer
- Update and Log the status of the Load in config_table

In [0]:
# Create Databricks widgets for MySQL and GCP/Databricks parameters
dbutils.widgets.text("mysql_host","34.135.26.173")
# Retrieve widget values for use in the notebook
mysql_host = dbutils.widgets.get("mysql_host")

dbutils.widgets.text("mysql_port","3306")
mysql_port = dbutils.widgets.get("mysql_port")

dbutils.widgets.text("mysql_root","root")
mysql_user = dbutils.widgets.get("mysql_root")

dbutils.widgets.text("mysql_db","GCPMigrationMeta")
mysql_db = dbutils.widgets.get("mysql_db")

dbutils.widgets.text("mysql_password","Admin123")
mysql_password = dbutils.widgets.get("mysql_password")

dbutils.widgets.text("bq_sa_key",'/Volumes/workspace/default/csv/datamigrationproject-483310-739969975183.json')
bq_sa_key = dbutils.widgets.get("bq_sa_key")

In [0]:
print(mysql_host)

In [0]:
%pip install mysql-connector-python google-cloud-bigquery

In [0]:
import json, os, datetime as dt
import mysql.connector as mc
from google.cloud import bigquery
from contextlib import contextmanager 
# contextlib python library help us to setup and clean up the resources automatically

MYSQL = {
    'host':mysql_host.strip(),
    'port':int(mysql_port),
    'user':mysql_user.strip(),
    'pwd':mysql_password,
    'db' :mysql_db
}
BQ_KEY=bq_sa_key.replace('dbfs:/','/dbfs/') 

In [0]:
MYSQL

In [0]:
@contextmanager
def mysql_conn():
    conn = mc.connect(
        host = MYSQL['host'], 
        port = MYSQL['port'],
        user = MYSQL['user'],
        password = MYSQL['pwd'],
        database = MYSQL['db']
    )
    try:
        yield conn
    except Exception as e:
        print(f"Failed to connect to MYSQL \nHere is the error -: \n{e}")
    # yield returns a value temporarily and pauses the function, then resumes later from the same place.
    finally:
        conn.close()

In [0]:
def fetch_eligible_tables():
    """Get rows to process with needed columns."""
    with mysql_conn() as conn:
        cur = conn.cursor(dictionary=True)
        cur.execute("""
          SELECT table_name, source_project, source_dataset, gcs_path
          FROM config_table
          WHERE active_flag=1 AND load_flag=1
          ORDER BY table_name
        """)
        rows = cur.fetchall()  # Get all matching rows as a list of dicts
        cur.close()
    return rows

In [0]:
def bq_to_gcs_status(table_name, status, err = None):
    with mysql_conn() as conn:
        cur = conn.cursor()
        if status == 'IN_PROGRESS':
            cur.execute(f"""
                        UPDATE GCPMigrationMeta.config_table
                        SET bq_to_gcs_status = 'IN_PROGRESS',
                        last_run = NOW(),
                        error_message = NULL
                        WHERE table_name = '{table_name}'
                        """)
        elif status == 'COMPLETED':
            cur.execute(f"""
                        UPDATE GCPMigrationMeta.config_table
                        SET bq_to_gcs_status = 'COMPLETED',
                        last_run = NOW(),
                        error_message = NULL
                        WHERE table_name = '{table_name}'
                        """)

        else:
             cur.execute(f"""
                        UPDATE GCPMigrationMeta.config_table
                        SET bq_to_gcs_status = 'FAILED',
                        error_message = '{str(err)[:2000] if err else "FAILED"},
                        WHERE table_name = '{table_name}'
                        """)
        conn.commit()
        cur.close()


In [0]:
def bq_client():
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = BQ_KEY
    with open(BQ_KEY) as f:
        proj = json.load(f).get('project_id')
    return bigquery.Client(project=proj)

In [0]:
def export_table_to_gcs(source_project, source_dataset, table_name, gcs_base):
    """
    Exporting the Bigquery table to GCS Bucket
    Aurthor: Naina Johri
    """
    client = bq_client()
    full = f"{source_project}.{source_dataset}.{table_name}"
    ts = (dt.datetime.utcnow() + dt.timedelta(hours=5, minutes=30)).strftime("%Y%m%dT%H%M%SZ")
    dest_prefix = f"{gcs_base}/dt={ts}/"
    dest_uri = f"{dest_prefix}*.parquet"

    job_cfg = bigquery.job.ExtractJobConfig(
        destination_format = bigquery.DestinationFormat.PARQUET
    )

    print(f"Exporting the Data for {full} ---> {dest_uri}")
    job = client.extract_table(full, dest_uri, job_config = job_cfg)
    job.result()
    print(f"Exported the Data for {full} ---> {dest_uri}")
    return dest_uri

In [0]:
rows=fetch_eligible_tables()

if not rows: 
    print("No table to be loaded with active_flag=1 and load_flag=1")


else:
    print(f"-------Processing {len(rows)} table(s): {[r['table_name'] for r in rows]}")


    for r in rows:
        t=r['table_name']

        try: 
            bq_to_gcs_status(t,'IN_PROGRESS')
            print(f"--------\nProcessing {t}")
            uri=export_table_to_gcs(
                r['source_project'],
                r['source_dataset'],t,r['gcs_path'])
            
            bq_to_gcs_status(t,'COMPLETED')
            print(f"Completed {t}")

        except Exception as err:
                bq_to_gcs_status(t,'FAILED',err=err)
                print(f"Failed {t} \n{err}")
                