## This notebook is to ingest the ddl definition into cloudant database

**Remember:**   
the mapping /result in docker image is mapped to local folder ~/Documents/GitHub/jupyter/result  
the mapping /input in docker image is mapped to local folder ~/Documents/GitHub/jupyter/input

link to the cloudanta database is here:   
https://d5e8ab56-62ce-4345-b1f2-33e670691507-bluemix.cloudant.com/dashboard.html 

The following code is to ingest the ddl into cloudant database. 

1. ftp to get the file from server stfmvs1
1. parse the ddl unload to list of ddls
1. merge the tid information from rz1 and rz3 cloudant database
1. populate the ddl information into cloudant database.

Before run this notebook, please submit the jcl in `ODMLD.PRD.RUN(ODMDDLRJ)` first on the stfmvs1 server
the result file on the server will be `C943511.RES.GENSQL.ODM` and `C943511.RES.GENSQL.ODMR` 

the MQT for ODM: `ODMLD.PRD.RUN(ODMDDMQT)` , the result is in `C943511.RES.GENSQL.ODM.MQT*`  
the MQT for ODMR: `ODMLD.PRD.RUN(ODMDDLMQ)`, the result is in `C943511.RES.GENSQL.ODMR.MQT*`  


In [None]:
import sys
sys.path.append('/odm_modules')
from common_func import cloudant_conn
from common_func import odm_ftp
import pandas as pd
import re

In [None]:
import os
print(os.environ['CLOUDANT_USER'])

### Setup the cloudant_db_name and get the tid information which can be used to enrich the tid information

In [None]:
#environ_suffix = '_test'
environ_suffix = ''   # when production usage

cloudant_db_name = 'ddl{}'.format(environ_suffix) # the cloudant database name to be ingested
cloudant_conn.cloudant_client.connect()
df_rz1 = pd.DataFrame(list(cloudant_conn.cloudant_client['rz1{}'.format(environ_suffix)]))
df_rz3 = pd.DataFrame(list(cloudant_conn.cloudant_client['rz3{}'.format(environ_suffix)]))

### unload the MQT files for both ODM and ODMR

In [None]:
odmr_mqt_file = ['odmr_mqt_ddl{}.txt'.format(str(i+1)) for i in range(3)]
odm_mqt_file = ['odm_mqt_ddl{}.txt'.format(str(i+1)) for i in range(3)]  
#print(odm_mqt_file)
# the file name in the folder input
with odm_ftp.odm_ftp_conn('get') as odm_get_file: 
    [odm_get_file(fm = 'C943511.RES.GENSQL.ODM.MQT{}'.format(str(i+1)), to= '/input/{}'.format(file_name)) 
     for i,file_name in enumerate(odm_mqt_file)]
    [odm_get_file(fm = 'C943511.RES.GENSQL.ODMR.MQT{}'.format(str(i+1)), to= '/input/{}'.format(file_name)) 
     for i,file_name in enumerate(odmr_mqt_file)]

In [None]:
odm_mqt_texts = [open('/input/{}'.format(file_name), 'r').read() for file_name in odm_mqt_file]
odm_mqt_texts = [re.sub('--    Materialized Query Table=', '--    View=', text, re.I) for text in odm_mqt_texts]
odm_mqt_texts = [re.sub('\n\s*COMMIT;\s*\n', '\n', text, re.I) for text in odm_mqt_texts]

odmr_mqt_texts = [open('/input/{}'.format(file_name), 'r').read() for file_name in odmr_mqt_file]
odmr_mqt_texts = [re.sub('--    Materialized Query Table=', '--    View=', text, re.I) for text in odmr_mqt_texts]
odmr_mqt_texts = [re.sub('\n\s*COMMIT;\s*\n', '\n', text, re.I) for text in odmr_mqt_texts]
    
odm_mqt_text = '\n'.join(odm_mqt_texts)
odmr_mqt_text = '\n'.join(odmr_mqt_texts)

### unload the file frome stfmvs1 server

In [None]:
odmr_file = 'odmr_ddl.txt' # this is the final file name
odm_file = 'odm_ddl.txt'  

odmr_view_file = 'odmr_view_ddl.txt'
odm_view_file = 'odm_view_ddl.txt'  
with odm_ftp.odm_ftp_conn('get') as odm_get_file:
    odm_get_file(fm = 'C943511.RES.GENSQL.ODMR', to= '/input/{}'.format(odmr_view_file))
    odm_get_file(fm = 'C943511.RES.GENSQL.ODM', to= '/input/{}'.format(odm_view_file))

# IMPORTANT    Attach MQT ddl into the file:

In [None]:
with open('/input/{}'.format(odm_file), 'w') as f:
    text = open('/input/{}'.format(odm_view_file), 'r').read()
    text = re.sub('\n\s*COMMIT;\s*\n', '\n', text, re.I) #remove the last COMMIT
    f.write(text)
    f.write(odm_mqt_text)
with open('/input/{}'.format(odmr_file), 'w') as f:
    text = open('/input/{}'.format(odmr_view_file), 'r').read()
    text = re.sub('\n\s*COMMIT;\s*\n', '\n', text, re.I) #remove the last COMMIT
    f.write(text)
    f.write(odmr_mqt_text)

In [None]:
odm_text = open('/input/{}'.format(odm_file), 'r').read()
odmr_text = open('/input/{}'.format(odmr_file), 'r').read()
odm_text +=odmr_text

In [None]:
#ddls_odm = re.split('COMMIT\s*;', odm_text, flags = re.I)
ddls_odm = re.split('--    View=.*?\n', odm_text, flags = re.I)
#ddls_odm = ddls_odm[0:-1]
print("Total number of ddl are : {}".format(len(ddls_odm)))


In [None]:
def process_ddl(ddl_stmt):
    ddl_lines = ddl_stmt.split('\n')
    ddl_lines = list( filter(lambda x: x.lstrip()[0:2] != '--'   , ddl_lines) )
    ddl_lines = list(filter(lambda x: not (x.lstrip()[0:3] == 'SET' and x.rstrip()[-1] == ';'), ddl_lines ) )
#    ddl_lines = filter(lambda x: not (x.lstrip()[0:6] == 'COMMIT' and x.rstrip()[-1]== ';'), ddl_lines) # remove commit ; 
    ddl_lines = [' ' if i.strip() =='' else i.strip() if i.strip()[-1]=='.' else i.strip()+ ' ' for i in ddl_lines]
    ddl_lines = [stmt.strip(';').strip() for stmt in ddl_lines]
    ddl = "\n".join(ddl_lines).strip() # turn multiple lines into one single line.
    return ddl

In [None]:
ddls_odm = list(map(process_ddl, ddls_odm))
ddls_odm = list(filter(lambda ddl: ddl != '', ddls_odm))

In [None]:
def parse_ddl(ddl):
    p = re.compile(r'CREATE\s+(?:VIEW|TABLE)\s+(\w+)\.(\w+).*?AS(.*)', re.I|re.S)   # v0.19 compatible for handle mqt
    p1 = re.compile(r'CREATE\s+(?:VIEW|TABLE)\s+(\w+)\.(\w+)\s*\(([^()]*)\).*?AS(.*)', re.I|re.S) #v0.19 compatible for hadle mqt
    x = p.match(ddl)
    x1 = p1.match(ddl)
    x = x1 if x1 is not None else x  # first check x1, x1 always takes more information from the ddl
    schema, view_name, view_query = x.groups()[0], x.groups()[1], x.groups()[-1]
    return {"view_name": view_name, "ddl": ddl, "schema": schema}

In [None]:
ddls_odm = list(map(parse_ddl, ddls_odm))
#list(map(lambda x: print(x['view_name']), ddls_odm))

In [None]:
df = pd.DataFrame(ddls_odm)
dfs = pd.concat([df_rz1, df_rz3]).rename(columns = {'_id': 'table_id'})
dfs.head()

In [None]:
df = df.merge(dfs.drop(columns = ['_rev', 'src', 'description', 'alter_id']), left_on = 'view_name', right_on = 'table_name', how = 'left').drop(columns = ['table_name']).fillna('')

In [None]:
res  = cloudant_conn.df_2_cloudant(df, cloudant_db_name, mode = 'REPLACE', keys = ['view_name'], src_code = 'ddl')
res

In [None]:
df.loc[~df.view_name.str.startswith('ODM') & ~df.view_name.str.startswith('V')]

## the following script is to create the DDL for linage program to process

_some defect need to be addressed_
1. the ODMV_TAS_TRKHIRE can not be parsed because the alias problem

In [None]:
def proc_ddl(ddl_stmt):
    ddl_lines = ddl_stmt.split('\n')
    ddl_lines = list( filter(lambda x: x.lstrip()[0:2] != '--'   , ddl_lines) )
    ddl_lines = [' ' if i.strip() =='' else i.strip() if i.strip()[-1]=='.' else i.strip()+ ' ' for i in ddl_lines]
    ddl = "\n".join(ddl_lines).strip() # turn multiple lines into one single line.
    return ddl

In [None]:
from datetime import datetime
now = datetime.now()
print("now =", now)
dt_string = now. strftime("%Y%m%d")
print("date and time =", dt_string)

In [None]:
odmr_text = open('/input/{}'.format(odmr_file), 'r').read()
ddls_odmr = re.split('--    View=.*?\n', odmr_text, flags = re.I)
ddls_odmr = ddls_odmr[1:]  #remove the first element
print("Total number of ddl are : {}".format(len(ddls_odmr)))
ddls_odmr = list(map(proc_ddl, ddls_odmr))
ddls_odmr = list(filter(lambda ddl: ddl != '', ddls_odmr))

### special handling
### special handling
ddls_odmr = list(filter(lambda ddl: 'VBPS_LDA' not in ddl, ddls_odmr))  # remove odmv_tas_trkhire
ddls_odmr = list(filter(lambda ddl: 'ODMPRD.ODMV_TAS_TRKHIRE' not in ddl, ddls_odmr))  # remove odmv_tas_trkhire
### special handling
### special handling


#ddls_odmr = list(filter(lambda ddl: 'CREATE VIEW ODMPRD.ODMV_EMPLOYEE' not in ddl, ddls_odm))  # remove odmv_tas_trkhire

ddls_odmr.append('')
list(filter(lambda ddl: 'ODMV_TAS_TRKHIRE' not in ddl, ddls_odmr))
final_odmr_ddl = '\nCOMMIT ; \n--\n'.join(ddls_odmr)
with open('/result/odmr_views_{}.txt'.format(dt_string), 'w') as f:
    f.write(final_odmr_ddl)
with open('/result/odmr_views.txt', 'w') as f:
    f.write(final_odmr_ddl)

### From Here, prepare some addition DDL for those customized extract interfaces

In [None]:
cust_exts = {
    "KIW": 'ODMLD.PROD.SQL(ODMKIWQ)',
    'KAR': "ODMLD.PROD.SQL(ODMARK01)", 
#    "CARD": "OPC.PRODJCL.HR(ODMULCRD)", 
#    "WHILELIST": "OPC.PRODJCL.HR(ODMULWHL)",
    "KAP": 'ODMLD.PROD.SQL(ODMAPK01)', 
#    "SMS": "ODMLD.PROD.SQL(ODMEYLQ1)",  Trailer line number
#    "MGT": "ODMLD.PROD.SQL(ODMMGTQ1)",  JCL?
#    "IRON": "ODMLD.PROD.SQL(ODMDERPQ)",  JCL? 
    "KIR": "ODMLD.PROD.SQL(ODMUKIR)", 
    "KIJ": "ODMLD.PROD.SQL(ODMUKIJ)", 
    "KIE": "ODMLD.PROD.SQL(ODMUKIE)"
    
}
cust_exts.keys()

In [None]:
ddl_temp = '''CREATE VIEW ODMPRD.{} AS 
{}
COMMIT ;
--'''

with odm_ftp.odm_ftp_conn('get') as odm_get_file:
    [odm_get_file(fm = value, to = '/input/{}.sql'.format(key)) for key, value in cust_exts.items()]
ddls = {key: ddl_temp.format(key, open('/input/{}.sql'.format(key), 'r').read().strip()) for key in cust_exts.keys()}


In [None]:
with  open('temp.txt', 'w') as f: 
    ddl_added = '\n'.join(ddls.values())
    print(ddl_added,  file = f )


### Need special process for KIE interface since it contains strings like '"'  '""' etc.. very difficult. 

In [None]:
odm_text = open('/input/{}'.format(odm_file), 'r').read()
ddls_odm = re.split('--    View=.*?\n', odm_text, flags = re.I)
ddls_odm = ddls_odm[1:]  #remove the first element
print("Total number of ddl are : {}".format(len(ddls_odm)))
ddls_odm = list(map(proc_ddl, ddls_odm))
ddls_odm = list(filter(lambda ddl: ddl != '', ddls_odm))

### special handling
### special handling
### special handling
ddls_odm = list(filter(lambda ddl: 'ODMV_TAS_TRKHIRE' not in ddl, ddls_odm))  # remove odmv_tas_trkhire
ddls_odm.append('')
list(filter(lambda ddl: 'ODMV_TAS_TRKHIRE' not in ddl, ddls_odm))
final_odm_ddl = '\nCOMMIT ; \n--\n'.join(ddls_odm)
final_odm_ddl = final_odm_ddl.replace('"SYSIBM".', 'SYSIBM.')
### special handling#
## special handling

with open('/result/odm_views_{}.txt'.format(dt_string), 'w') as f:
    f.write(final_odm_ddl)
    f.write(ddl_added)
with open('/result/odm_views.txt', 'w') as f:
    f.write(final_odm_ddl)
    f.write(ddl_added)

### Curation Lineage STEP 2 do the following to generate the lineage information



Now, the ddl of ODM and ODMR are in the result/odm_views.txt and result/odmr_views.txt

Next, just start the docker image odm-lineage:latest, then the result will be placed in the following 2 files  
```
cd ~/Documents/GitHub/odm-lineage
./test_local.sh

odm_views.txt_parse.xlsx  
odmr_views.txt_parse.xlsx
```  

go to the folder , open another termial, use the following command to monitor the proccessing:   
```
cd ${ODM_DAILY_PUBLIC_ROOT}/result 
tail -f lineage_log.txt
```
