In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import pytesseract
from pdf2image import convert_from_path
from tqdm import tqdm_notebook as tqdm
import tabula
import json
import re
import ast
import copy

Assumptions:
 - A higher order value can only be accessed by at least one non-base value
 - A higher order value can not exist if it can be produced by base values only ie (0,1),(1) can't exist

In [15]:
def order_parse(parse_values,preceding_values,sheetfields,dependency_add=False):
    
    parse_order_values = {}
    higher_order_values = {}
    
    for key,value in parse_values.items():
        exist = value['exist']
        conditions_list = exist.split('||')
        order_list = []
        dependency_list = []
        for conditions in conditions_list:
            pass_conditions = False
            tag_extract = re.findall("fields.(.*?)(?: ==| !=)",conditions)
            dependency_extract = re.findall("fields.(.*?)\)",conditions)
            for tag in tag_extract:
                if tag not in sheetfields:
                    pass_conditions = True            
            if not pass_conditions:
                tag_order = [tag in preceding_values for tag in tag_extract]
                tag_order = set([int(t) for t in tag_order])
                tag_set = tuple(tag_order)
                order_list.append(tag_set)
                if len(dependency_extract) > 0:
                    dependency_list.append(dependency_extract)     
        
        if dependency_add is True:
            value['dependency'] = dependency_list            
        order_update = True
        order_list = list(set(order_list))
        for order in order_list:
            if 0 in order:
                order_update = False
        if order_update is True:
            parse_order_values.update({key:value})
        else:
            higher_order_values.update({key:value}) 
            
    return parse_order_values,higher_order_values


class order_simulate_values():
    def __init__(self,var_name):
        self.var_name = var_name
        self.values_exist = []

#potential classmethod for generating from default by type
def default_type_values_generator(order_dict,order_object):
    
    order_simulate_name = order_object.var_name    
    default_dict = {"selectone":"0","yesno":"yes","text":"example text","text_list":"[text 1 ,text 2]",
                   "money":"100000","amount":"100","date":"30-Jun-2021","currency":"USD","number":"50",
                    "image":"example.jpg","percent":"2"}    
    for key,value in order_dict.items():
        default_val = str(default_dict[value['type']]) 
        order_object.__setattr__(key,default_val)
            
    return order_object


def exist_check(order_object,order_dict,check_object):
    check_name = check_object.__getattribute__("var_name")
    for key,value in order_dict.items():
        exist_dependencies = value['dependency']
        for condition_list in exist_dependencies:
            skip = False
            for condition in condition_list:
                evaluation = eval(f"{check_name}."+condition)
                if evaluation is False:
                    skip = True
            if skip is False:
                order_object.values_exist.append(key)
                check_value_update = order_object.__getattribute__(key)
                check_object.__setattr__(key,check_value_update)
    order_object.values_exist = list(set(order_object.values_exist))
    return order_object,check_object

def list_reference_order(sample_list,master_list):
    order_dict = {}
    for sample in sample_list:
        index = master_list.index(sample)
        order_dict.update({index:sample})
    order_vals = list(order_dict.keys())
    order_vals = sorted(order_dict)
    sample_order_list = [order_dict[index] for index in order_vals]
    return sample_order_list

In [16]:
data_dir = os.getcwd()
input_file = open(os.path.join(data_dir,'CBAWorkflowIon.txt'))
input_json = json.load(input_file)
ts_field_values = input_json['groups']['termsheet']['fields']

ts_fields = {}
for key,value in input_json['fields'].items():
    if key in ts_field_values:
        value.pop('editable')
        ts_fields.update({key:value})

termsheetfields = list(ts_fields.keys())

master_order_type = {}
index_fields = {}
for index,field in enumerate(ts_field_values):
    data_type = ts_fields[field]['type']
    if data_type not in master_order_type:
        master_order_type.update({data_type:[field]})
    else:
        master_order_type[data_type].append(field)

In [18]:
print(master_order_type)

{'image': ['logo_issuer', 'logo_dealer'], 'text': ['issuer', 'other_rating_agency', 'other_rating_agency_rating', 'security_type', 'status_of_notes', 'benchmark_bond', 'benchmark_curve', 'governing_law', 'expenses', 'isin', 'common_code', 'use_of_proceeds', 'termsheetdisclaimer'], 'currency': ['issuance_currency'], 'money': ['nominal_amount', 'net_proceeds', 'denomination_minimum', 'integral_multiple'], 'selectone': ['interest_basis', 'ratings', 'moodys_rating', 's_and_p_rating', 'fitch_rating', 'method_of_distribution', 'interest_frequency', 'fixed_day_count_fraction', 'day_count_fraction_floating', 'business_day_convention', 'reference_rate', 'mifid_target_market', 'bearer_or_registered'], 'percent': ['fixed_interest_rate', 'margin', 'reoffer_price', 'reoffer_yield', 'upfront_fees', 'all_in_price', 'all_in_yield', 'final_redemption_amount'], 'date': ['maturity_date', 'issue_date'], 'yesno': ['issuer_ratings', 'moodys_rating_required', 's_and_p_rating_required', 'fitch_rating_required

type_list = []
for keys,values in ts_fields.items():
    type_list.append(values['type'])
    
type_set = set(type_list)
print(f'Data Types Present: {type_set}')

In [4]:
base_dict = {}
non_base_dict = {}
for key,value in ts_fields.items():
    if value['exist'] is True:
        base_dict.update({key:value})
    else:
        non_base_dict.update({key:value})


preceding_dict = base_dict
first_order_dict,high_order_dict = order_parse(non_base_dict,preceding_dict,termsheetfields,dependency_add=True)
preceding_dict.update(first_order_dict)
second_order_dict,higher_order_dict = order_parse(high_order_dict,preceding_dict,termsheetfields)
assert len(higher_order_dict) == 0, 'Higher order Exists'

In [5]:
base_simulate_values = order_simulate_values('base_simulate_values')

for key in base_dict.keys():
    base_simulate_values.values_exist.append(key)

first_order_simulate_values = order_simulate_values('first_order_simulate_values')
second_order_simulate_values = order_simulate_values('second_order_simulate_values')

base_simulate_values = default_type_values_generator(base_dict,base_simulate_values)
first_order_simulate_values = default_type_values_generator(first_order_dict,first_order_simulate_values)
second_order_simulate_values = default_type_values_generator(second_order_dict,second_order_simulate_values)

In [6]:
check_object = copy.deepcopy(base_simulate_values)
first_order_simulate_values, check_object = exist_check(first_order_simulate_values,first_order_dict,check_object)
second_order_simulate_values, check_object = exist_check(second_order_simulate_values,second_order_dict,check_object)

In [7]:
print(first_order_simulate_values.values_exist)

['mifid_manufacturers', 'business_day_convention_required', 'interest_frequency', 'other_rating', 'use_of_proceeds', 'paying_agent_details', 'fixed_interest_payment_dates', 'moodys_rating_required', 'ratings', 'fitch_rating_required', 'integral_multiple', 'lead_managers', 'fixed_interest_rate', 'fixed_day_count_fraction', 's_and_p_rating_required', 'additional_business_centres']


In [8]:
print(second_order_simulate_values.values_exist)

['fitch_rating', 'moodys_rating', 'business_day_convention', 'other_rating_agency', 'other_rating_agency_rating', 's_and_p_rating']


In [9]:
print(base_simulate_values.values_exist)

['additional_business_centres_required', 'all_in_price', 'all_in_yield', 'bearer_or_registered', 'benchmark_bond', 'benchmark_curve', 'common_code', 'denomination_minimum', 'expenses', 'final_redemption_amount', 'governing_law', 'integral_multiple_required', 'interest_basis', 'investor_put', 'isin', 'issuance_currency', 'issue_date', 'issuer', 'issuer_call', 'issuer_ratings', 'listing', 'logo_dealer', 'logo_issuer', 'maturity_date', 'method_of_distribution', 'mifid_manufacturers_required', 'mifid_target_market', 'net_proceeds', 'nominal_amount', 'paying_agent_details_required', 'reoffer_price', 'reoffer_yield', 'security_type', 'spread_over_benchmark_in_bps', 'status_of_notes', 'termsheetdisclaimer', 'upfront_fees', 'use_of_proceeds_required', 'additional_business_centres', 'business_day_convention_required', 'day_count_fraction_floating', 'fitch_rating_required', 'fixed_day_count_fraction', 'fixed_interest_payment_dates', 'fixed_interest_rate', 'floating_interest_payment_dates', 'inte

In [10]:
print(termsheetfields)


['additional_business_centres', 'additional_business_centres_required', 'all_in_price', 'all_in_yield', 'bearer_or_registered', 'benchmark_bond', 'benchmark_curve', 'business_day_convention', 'business_day_convention_required', 'common_code', 'day_count_fraction_floating', 'denomination_minimum', 'expenses', 'final_redemption_amount', 'fitch_rating', 'fitch_rating_required', 'fixed_day_count_fraction', 'fixed_interest_payment_dates', 'fixed_interest_rate', 'floating_interest_payment_dates', 'governing_law', 'integral_multiple', 'integral_multiple_required', 'interest_basis', 'interest_frequency', 'investor_put', 'isin', 'issuance_currency', 'issue_date', 'issuer', 'issuer_call', 'issuer_ratings', 'lead_managers', 'listing', 'logo_dealer', 'logo_issuer', 'margin', 'maturity_date', 'method_of_distribution', 'mifid_manufacturers', 'mifid_manufacturers_required', 'mifid_target_market', 'moodys_rating', 'moodys_rating_required', 'net_proceeds', 'nominal_amount', 'other_rating', 'other_ratin