In [1]:
import pandas as pd
import numpy as np 
import re, json, os , logging , random, html, datetime 
from os import listdir
from os.path import isfile, join
from lib.config import connstr
from lib.context import context
from lib.utils import * 
from lib.picklist_recommender import picklist_recommender 
from lib.script_generator import script_generator   
ctx=context() 
ctx.logger.setLevel(logging.DEBUG)
config = {}
with open('config.json', 'r') as f: 
    config=json.loads(f.read())    
ctx.config=config 
import warnings
warnings.filterwarnings('ignore')

In [None]:
gen=script_generator(ctx)
sql_items, sql = gen.list_to_sql( ['AAA', 'BBB'] )
print(sql)

In [2]:
 
files = { 
'2023-A-HVAARC':'HVA Network Architecture Questionnaire v0.1.xlsx' ,
'2023-A-HVAASSET':'HVA Asset Management Questionnaire v0.1.xlsx' ,
'2023-A-HVABC':'HVA Business Continuity Management Questionnaire v0.1.xlsx' ,
'2023-A-HVADPRO':'HVA Data Protection Questionnaire v0.1.xlsx',
'2023-A-HVAENDPRO':'HVA Endpoint Protection Questionnaire v0.1.xlsx',
'2023-A-HVAIDENT':'HVA Identity and Access Management Questionnaire v0.1.xlsx',
'2023-A-HVAMD':'HVA Monitoring and Detection Questionnaire v0.1.xlsx',
'2023-A-HVAREM':'HVA Remote Access Questionnaire v0.1.xlsx',
'2023-A-HVAVULN':'HVA Vulnerability Management Questionnaire v0.1.xlsx' 
}
#files = [f for f in listdir(path) if re.search('^HVA.*1\.xlsx', f)] 

In [6]:
def normalize(s): 
    s=s.upper().strip()
    s=re.sub('[^A-Z0-9\s]','',str(s))
    s=re.sub('\s{1,}','',str(s))
    return s
normalize('Jump host access (e.g., the user accesses the HVA from an intermediate host) ')
 
def get_recommender(PK_FORM, normalizer=normalize):
    pl=sql_todf(f"""
    SELECT LTRIM(RTRIM(DisplayValue))  [Selections], PK_PickList [ML_Value], PK_PickListType [PK_PickListType]
    FROM vwPickLists  
    WHERE PK_PickListType IN ( SELECT DISTINCT FK_PickListType  FROM vwQuestions WHERE PK_FORM='{PK_FORM}') 
    ORDER BY PK_PickListType DESC
    """, connstr)   
    pl['SelectionsNorm'] = pl['Selections'].apply(normalize) 
    pr = picklist_recommender(ctx.config['connstr'], reset_cache=True, picklist_where=f" PK_PickListType IN ( SELECT DISTINCT FK_PickListType  FROM vwQuestions WHERE PK_FORM='{PK_FORM}') ") 
    
    return pr, pl

def xl_to_df(PK_FORM): 
    path=r'C:\Users\timko\Downloads\Questionnaire\\' + files[PK_FORM]
    print(PK_FORM)
    converts={
        'Selections':lambda x: re.sub("^(\s?\d{,2}\.)",'',str(x)).strip() 
        , 'SML':lambda x: re.sub('[^A-Z0-9]','',str(x)).strip() 
        , 'ML Scored':lambda x: str(x).strip() 
    }
    df=pd.read_excel(f'{path}', converters=converts,  sheet_name=4, header=2, usecols='B:I, M:R').reset_index()  
    
    for c in ['ID','Question','SML']:  df[c] = df[c].replace(to_replace=[0, '0'], method='ffill')
    for c in df.columns:  df = df.rename({c: re.sub('\s','_',c) }, axis='columns')
 
    if 'ML' in df.columns:
        df["ML_Scored"]= df["ML_Score"].apply(lambda x: str(x).strip())
        df['ML_Score']=df["ML"].apply(lambda x: str(x).strip())
        df=df.drop(columns='ML')
 
    df['PK_FORM']=PK_FORM
    df["ID"].fillna(method='ffill', inplace=True) 
    df.drop(columns=['Check_Answer', 'index'], inplace=True)
     
    df = df.loc[ df["ID"].str.contains('^\w\w\d(\w){0,5}$', na=False, regex=True) ]   
    df = df.loc[ df["SML"].str.contains('^SML(\d)$', na=False, regex=True) ]   
    
    df['ML_Score'] = df['ML_Score'].apply(lambda s: re.sub('[^0-9\.]','',str(s)))
    df['Selections']=df['Selections'].apply(lambda s: str(s).replace("'",'`'))
    df['SelectionsNorm'] = df['Selections'].apply(normalize)
    df['SML']=df['ML_Score'].apply(lambda s: f'SML{s}') 
    df.fillna('', inplace=True)
    for c in df.columns:  
        df[c]=df[c].astype('str')    
    return df

def apply_picklists(df): 
    PK_FORM=df.iloc[0]['PK_FORM']
    pr, pl=get_recommender(PK_FORM=PK_FORM, normalizer=normalize)
    dfp=df.groupby(['ID'], as_index=False).agg({'SelectionsNorm':list})  
    dfp['PK_PickListType']=dfp['SelectionsNorm'].apply(lambda s: pr.recommend(s, normalizer=normalize)['PK_PicklistType'])
    pr.to_cache() 

    dff=pd.merge(df,dfp.loc[:,['ID','PK_PickListType']], how='left', left_on='ID', right_on='ID')
    dff=pd.merge(dff,pl, how='left', left_on=['SelectionsNorm','PK_PickListType'], right_on=['SelectionsNorm','PK_PickListType']) 
    dff.rename(columns={'Selections_x': 'Selections'}, inplace=True)
    dff.ML_Value=dff.ML_Value.fillna(0).astype('int32').astype('str') 
    dff['PK_Picklist']=dff.ML_Value.astype('int32')
    dff = dff.drop(columns=[c for c in dff.columns if re.search('_\w$|Question', c) ])
    dff['sortorder'] = range(1, len(dff)+1) 
    if 'ZT_Score' in dff.columns:
        dff=dff.drop(columns='ZT_Score')
    return dff

#df = xl_to_df('2023-A-HVAAPPSEC')
#df = apply_picklists(df)
#df
#df.loc[df['ID'] == 'NA4a']
 

In [7]:
create=''
script=''
for k in files.keys():
    df = xl_to_df(k)
    df = apply_picklists(df)
    ins, create, temp=SQL_INSERT_FROM_DF(df, TABLE_NAME='HVASAScoring') 
    script=script + '\n\n--' + k + '\n\n' + '\n'.join(ins)
   
with open(f'{ctx.get_dest()}script.sql', 'w') as f: 
    f.write(f"{create} \n\n {script}"  )
with open(r'C:\dev\CyberScope\CyberScopeBranch\CSwebdev\database\_101_DB_Update_SAScoring_Excel_Import.sql', 'w') as f: 
    f.write( f"{create} \n\n {script}"  )

2023-A-HVAARC
2023-A-HVAASSET
2023-A-HVABC
2023-A-HVADPRO
2023-A-HVAENDPRO
2023-A-HVAIDENT
2023-A-HVAMD
2023-A-HVAREM
2023-A-HVAVULN


In [None]:
with open(f'{ctx.get_dest()}script.sql', 'r') as r: 
    txt = r.read( )
    with open(r'C:\dev\CyberScope\CyberScopeBranch\CSwebdev\database\_101_DB_Update_SAScoring_Excel_Import.sql', 'w') as f: 
        f.write(txt )

In [None]:

dff=apply_picklists(df)

ins, create, temp=SQL_INSERT_FROM_DF(dff, TABLE_NAME='HVASAScoring')
with open(f'{ctx.get_dest()}script.sql', 'w') as f:
    ins='\n'.join(ins)
    f.write(f"{create} \n\n {ins}"  )
    
with open (r'C:\dev\CyberScope\CyberScopeBranch\CSwebdev\database\_100_DB_Update-752.sql', 'r' ) as f:
    temp=f.read()
temp  

 

In [None]:
df1=dff
df1.SML.fillna(method='ffill', inplace=True)
df1=df1.loc[df1['ML_Score'] != '0']
df1['FK_Picklist']=df1['ML_Value'].astype(int)

df1=df1.loc[:, ['ID', 'SML', 'ML','ML_Score', 'ML_Scored', 'ML_Value', 'sortorder']]
sql=SQL_INSERT_FROM_DF(df1, TARGET='@SASCORING')

  
with open(f'{ctx.get_dest()}script.sql', 'w') as f:
    f.write('\n'.join(sql))
df1.loc[df1['ID']=='ID5b']
df1