In [None]:
import nbformat as nbf
import shutil
import json
import re

import numpy as np
import pandas as pd
import pydbtools as pydb



In [None]:
def get_count(Name,validation_checks):
    count = pydb.read_sql_query(f"select count(*) as count from {Name}")['count'][0]
    print (Name,count)
    validation_checks = add_count(Name.split(".")[1],count,validation_checks)
    return validation_checks

In [None]:
def add_count(Name,Count,validation_checks):
    if validation_checks.size!=0:
        validation_checks = np.vstack([validation_checks,[Name,Count]])
    else:
        validation_checks=np.array([Name,Count])
        validation_checks = validation_checks.reshape(1,2)

    return validation_checks

In [None]:
def get_equivalence_dict():
    with open('table_equivalents.txt','r') as f:
        data = f.read()
    f.close()
    equivalents_dict = json.loads(data)
    return equivalents_dict

In [None]:
def automatic_code_update(code):
    equivalents_dict = get_equivalence_dict()
    matched_tables = []
    for table in equivalents_dict:
        code,matched_tables = replace_table(code,table,equivalents_dict[table],matched_tables)        
    more_tables = ""
    more_tables = input("Are all the tables used for creating this table listed? (Y/N): "+str(matched_tables)).upper()
    while more_tables!="Y":
        old_name = input("What is the table name in the existing code (include full name e.g. \"Adoptout.table_name\"?): ")
        new_name = input("What is the table name on the platform (include full name e.g. \"fcsq.table_name\"?): ")
        add_to_code_dict(old_name,new_name)
        code,matched_tables = replace_table(code,old_name,new_name,matched_tables)
        more_tables = input("Are all tables included listed now? (Y/N): "+str(matched_tables)).upper()
        
    print ("\n")
    
    return code

In [None]:
def replace_table(code,table,new_table,matched_tables):
    table_in_code = re.compile(re.escape(table), re.IGNORECASE)
    new_code = table_in_code.sub(new_table,code)
    if new_code!=code:
        matched_tables.append(table)
    return (new_code,matched_tables)

In [None]:
def add_to_code_dict(old_name,new_name):
    equivalence_dict = get_equivalence_dict()
    equivalence_dict[old_name]=new_name
    with open('table_equivalents.txt','w') as f:
        f.write(json.dumps(equivalence_dict))
    f.close()
    

In [None]:
def get_inputs():
    table_name = input("table name (no spaces): ")
    while len(table_name.split())>1:
        table_name = input("table name (no spaces): ")
    number = input("number: ")
    desc = input("table description: ")
    temp_flag = ""
    while temp_flag not in ('T','F'):
        temp_flag = input("Temporary table T/F: ").upper()
    if temp_flag == 'T':
        temp_flag = True
    else:
        temp_flag = False
    return table_name,number,desc,temp_flag

In [None]:
def create_backup(notebook_name):
    original = notebook_name
    target = "automation_backup.ipynb"
    shutil.copyfile(original, target)

In [None]:
def new_title_cell(number,name,desc):
    cell = nbf.v4.new_markdown_cell(f"## {number}. {name} table - {desc} <a name={name}></a>")
    return [cell]

In [None]:
def new_drop_table_cells(name,folder_name):
    drop_mk_cell = nbf.v4.new_markdown_cell(f"### Drop the {name} table if it already exists and remove its data from the S3 bucket")
    
    drop_code = f"""drop_{name} = \"DROP TABLE IF EXISTS fcsq.{name}\"
pydb.start_query_execution_and_wait(drop_{name})
bucket.objects.filter(Prefix=\"fcsq_processing/{folder_name}/{name}\").delete();"""    
    drop_code_cell = nbf.v4.new_code_cell(drop_code)
    
    return [drop_mk_cell,drop_code_cell]

In [None]:
def new_create_table_cells(name,code,temp_flag,folder_name):
    create_mk_cell = nbf.v4.new_markdown_cell(f"### Create the {name} table")
    start_code = f"create_{name}_table =f\"\"\""
    if temp_flag:
        end_code = f"pydb.create_temp_table(create_{name}_table,\'{name}\')"
    else:
        start_code += f"""
CREATE TABLE IF NOT EXISTS fcsq.{name}
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/{folder_name}/{name}') AS"""
        end_code =  f"pydb.start_query_execution_and_wait(create_{name}_table);"
        
    create_code = start_code+code+"\"\"\"\n"+end_code 
    create_code_cell = nbf.v4.new_code_cell(create_code)    
    return [create_mk_cell,create_code_cell]

In [None]:
def new_validation_cells(name,temp_flag):
    validation_mk_cell = nbf.v4.new_markdown_cell(f"#### {name} validation")
    if temp_flag:
        database = "__temp__"
    else:
        database = "fcsq"
    validation_code = f"""{name}_count = pydb.read_sql_query("SELECT count(*) as count from {database}.{name}\")
{name}_count"""
    validation_code_cell = nbf.v4.new_code_cell(validation_code)
    return [validation_mk_cell,validation_code_cell]

In [None]:
def add_table(name,number,desc,temp_flag,code,folder_name,notebook_name):
    create_backup(notebook_name)
    cells = new_title_cell(number,name,desc)
    if temp_flag == False:
        cells = cells+new_drop_table_cells(name,folder_name)
    cells = cells+new_create_table_cells(name,code,temp_flag,folder_name)
    cells = cells+new_validation_cells(name,temp_flag)
    
    return cells



In [None]:
def create_snapshots_code():
    snapshots_needed = input("Type the name of any tables which need snapshot dates added separated by a comma, if none leave blank")
    snapshots_needed = snapshots_needed.split(",")
    snapshot_text = "Where ("
    for i in range(len(snapshots_needed)):
        snapshot_text=snapshot_text+"{database}."+snapshots_needed[i]+".mojap_snapshot_date= date\'{snapshot_date}\'"
        if i!=len(snapshots_needed)-1:
            snapshot_text+=" AND "
        else:
            snapshot_text+=")"
            
    return snapshot_text

In [None]:
code = f"""
SELECT
   *
 FROM
   DV_all_data
 WHERE NOT (YEAR = 2022 and QUARTER = 2);
"""

notebook_name = 'Domestic_Violence/Domestic_Violence_Extractions.ipynb'

code = automatic_code_update(code)

table_name,number,desc,temp_flag = get_inputs()

folder_name = 'Domestic_Violence'

cells = add_table(table_name,number,desc,temp_flag,code,folder_name,notebook_name)


nb = nbf.read(notebook_name, as_version=4)

nb['cells'] += cells
with open(notebook_name, 'w') as f:
        nbf.write(nb, f)
        



In [None]:
"""

Automatically updates numbering of queries

"""
import re
nb = nbf.read(notebook_name, as_version=4)
create_backup(notebook_name)
contents = ["## Contents"]

numbering_pointer = 1
create_contents = input("Create contents page? (Y/N): ").upper()
for i in range(len(nb['cells'])):    
    if nb['cells'][i]['cell_type'] == 'markdown':
        split_cell = re.split("## \d+\.", nb['cells'][i]['source'],1)
        if len(split_cell) == 2:
            if (create_contents=="Y"):
                desc_text_search = re.search('-.+<a',nb['cells'][i]['source'])
                if desc_text_search:
                    desc_text = desc_text_search.group(0)[:-2]
                else:
                    desc_text = ""
                html_tag = re.search('<a name=.+><\/a>',nb['cells'][i]['source']).group(0)
                html_tag_name = html_tag[8:-5]
                contents.append(f"{numbering_pointer}. [{html_tag_name}](#{html_tag_name}) {desc_text}")
            split_cell[0] = f"## {numbering_pointer}."
            new_cell = "".join(split_cell)
            nb['cells'][i]['source'] = new_cell
            numbering_pointer+=1

if create_contents == "Y":
    contents_cell = nbf.v4.new_markdown_cell("\n".join(contents))
    nb['cells'].append(contents_cell)
    
with open(notebook_name, 'w') as f:
    nbf.write(nb, f)
f.close()          

In [None]:
import re
nb = nbf.read(notebook_name, as_version=4)
list_of_SAS_tables=[]
list_of_tables = []
for i in range(len(nb['cells'])):    
    if nb['cells'][i]['cell_type'] == 'code':
            table_search = re.search(('(fcsq\..+")|(__temp__\..+)"'),nb['cells'][i]['source'])            
            if table_search!=None:
                    table_name = table_search.group(0).split(" ")[0][:-1]
                    if table_name not in list_of_tables:
                        #prefix = input(f"Which database is {table_name} from in SAS: ")
                        list_of_tables.append(table_name)
                        list_of_SAS_tables.append(prefix+table_name)

list_of_tables
list_of_SAS_tables




with open("SAS_Code.txt",'w') as f:
    f.write(SAS_Code)
f.close()


In [None]:
SAS_Code = ""
for table in list_of_tables:
    table_name = table.split(".")[1]
    line_check = f"SELECT '{table_name}',count(*) as count from {table_name}\n UNION ALL\n"
    SAS_Code+=line_check

print (SAS_Code)

In [None]:
validation_checks = np.array([])
for table in list_of_tables:
    validation_checks = get_count(table,validation_checks)
    
#checks_df = pd.DataFrame(validation_checks,columns=['Name','Count'])
#checks_df

In [None]:
checks_df.to_csv(path_or_buf = 's3://alpha-family-data/CSVs/Domestic_Violence/Domestic_Violence_Validation.csv',index=False)

In [None]:
list_of_tables

In [None]:
"""

Restore backup

"""

nb = nbf.read("automation_backup.ipynb", as_version=4)
with open(notebook_name, 'w') as f:
        nbf.write(nb, f)



In [None]:
create_snapshots_code()