# Term Table

Display all terms of a specified indenture in one table

In [1]:
import sqlite3
import pandas as pd
import numpy as np
import string
from collections import defaultdict

# Establish the connection to database
sql_path = 'database.sqlite'
conn = sqlite3.connect(sql_path)
c = conn.cursor()

In [2]:
# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('max_colwidth',500)

In [3]:
def definition_table(indenture_name, columns):    
    c.execute("select * FROM DEFINITION")
    all_info = c.fetchall()
    df = pd.DataFrame(all_info, columns=['Indentures', 'Term', 'Text'])
    term_list = df[df['Indentures']==indenture_name]['Term']
    
    # create the final dictionary
    dic = defaultdict(lambda : [])
    for term in term_list:
        if term[0] not in string.ascii_uppercase[:26].join(string.ascii_lowercase[:26]):
            dic['#'].append(term)
        else:
            dic[term[0].upper()].append(term)
            
    # create the final dataframe
    df = pd.DataFrame()
    for key,values in dic.items():
        if values:
            length = len(values) // columns
            rest_col = columns - len(values) % columns
            for i in range(length):
                df = df.append(pd.DataFrame(values[i*columns:(i+1)*columns], columns=[key]).T)
            if rest_col!=columns:
                df = df.append(pd.DataFrame(np.pad(values[(length)*columns:], (0,rest_col), 
                                              mode='constant', constant_values='-'),columns=[key]).T)
    df.index.name = 'Capital'
    return df

In [4]:
indenture_name = 'TEL133.20170810.dce07916-3ca5-2704-db92-fcfd7bbd6f53.5d35d0d4f9a56c3750c8880591dcb580.pdf'

In [5]:
print(indenture_name)
definition_table(indenture_name, columns=9)

TEL133.20170810.dce07916-3ca5-2704-db92-fcfd7bbd6f53.5d35d0d4f9a56c3750c8880591dcb580.pdf


Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8
Capital,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
#,25% Limitation,-,-,-,-,-,-,-,-
A,Accountants’ Report,Accounts,Act of Holders,Additional Issuance Threshold Test,Additional Subordinated Collateral Servicing Fee Condition,Adjusted Collateral Principal Amount,Adjusted Weighted Average Moody’s Rating Factor,Administrative Expense Cap,Administrative Expenses
A,Affected Class,Agent Members,Aggregate Excess Funded Spread,Aggregate Funded Spread,Aggregate Outstanding Amount,Aggregate Principal Balance,Aggregate Unfunded Spread,Applicable Issuers,Asset-Backed Commercial Paper
A,Assets,Assumed Reinvestment Rate,Authenticating Agent,Authorized Officer,-,-,-,-,-
B,Balance,Bank,Bankruptcy Exchange,Bankruptcy Exchange Test,Bankruptcy Subordination Agreement,Board of Directors,Board Resolution,Bond,Bridge Loan
B,Bridge Financing Period,Business Day,-,-,-,-,-,-,-
C,Caa Excess,Caa Excess Adjustment Amount,Calculation Agent,Cash,CCC Excess,CCC Excess Adjustment Amount,CEA,Certificated Notes,Certificated Secured Note
C,Certificated Security,Certificated Subordinated Note,Class,Class A Notes,Class A-R Notes,Class A/B Coverage Tests,Class B Notes,Class B-R Notes,Class C Coverage Tests
C,Class C Notes,Class C-R Notes,Class D Coverage Tests,Class D Notes,Class D-R Notes,Class Default Differential,Class E Coverage Tests,Class E Notes,Class E-R Notes
C,Class Scenario Default Rate,Class X Principal Amortization Amount,Clearing Corporation Security,Clearstream,Closing Date,Code,Co-Issuers,Collateral Administration Agreement,Collateral Administrator


# Search in SQL database

Search information in the pre-created SQL database

In [6]:
def sql_search(indenture, term):
    sqlite_file = 'database.sqlite'
    conn = sqlite3.connect(sqlite_file)
    c = conn.cursor()
    
    # search term in target indenture
    query1 = """
    SELECT definition
    FROM DEFINITION
    WHERE term = '%s' AND file_name = '%s' 
    """ %(term, indenture)
    c.execute(query1)
    target_text = c.fetchall()
    
    # search all terms in the database
    query2 = """
    SELECT file_name, definition
    FROM DEFINITION
    WHERE term = '%s' 
    """ %(term)
    c.execute(query2)
    all_info = c.fetchall()
    text_list = [x[1] for x in all_info]
    
    return target_text[0][0], text_list, all_info

In [7]:
# term = input()
# target_text, text_list, all_info = sql_search(indenture_name, term)
# target_text, text_list

# Similarity Table

Calculate similarity score by self-trained word vector with 100 dimensions and Word Mover's Distance algorithm

In [8]:
from Similarity import Similarity_score
from frequency_weight import word_freq_dict, sent_score, sql_freq, indenture_names, get_sent_score, color_df

In [9]:
# Input
search_indenture = 'TEL133.20170810.dce07916-3ca5-2704-db92-fcfd7bbd6f53.5d35d0d4f9a56c3750c8880591dcb580.pdf'
search_term = 'Authorized Officer'

In [10]:
# all_info include both target text and comparison text (indenture name + text)
target_text, all_text, all_info = sql_search(search_indenture, search_term)
model_path = './w2v_selftrained/w2v_100_sg.bin'

# Similarity Score
score = Similarity_score(target_text, all_text, model_path)
final_score = [round(x,4) for x in score.result()]

# Get sentences and corresponding scores
res = get_sent_score(search_term, all_text)

In [11]:
# Stack dataframe
df = pd.DataFrame({'Indenture': [indenture_names(x[0]) for x in all_info],
                   'Similarity_Score': final_score, 'Text': res[0], 'Sent_Score': res[1]})

df_target  = df[df['Indenture'] == indenture_names(search_indenture)]
df_comp = df[df['Indenture'] != indenture_names(search_indenture)]

# dataframe for the target term
df_target = df_target.set_index(['Indenture', 'Similarity_Score']).stack().str.split('##', expand=True).stack().unstack(-2)
df_target['Sent_Score'] = df_target['Sent_Score'].astype(float)

# dataframe for comparison terms
df_comp = df_comp.set_index(['Indenture', 'Similarity_Score']).stack().str.split('##', expand=True).stack().unstack(-2)
df_comp['Sent_Score'] = df_comp['Sent_Score'].astype(float)
df_comp = df_comp.sort_values(by=['Similarity_Score',  'Indenture'], ascending=False)

In [12]:
# Color coding for target text
df_target.style.apply(color_df,axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Text,Sent_Score
Indenture,Similarity_Score,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TEL133,1.0,0,"With respect to the Issuer or the Co-Issuer, any Officer orany other Person who is authorized to act for the Issuer or the Co-Issuer, as applicable, in mattersrelating to, and binding upon, the Issuer or the Co-Issuer.",0.6
TEL133,1.0,1,"With respect to the Collateral Servicer,any Officer, employee, member or agent of the Collateral Servicer who is authorized to act forthe Collateral Servicer in matters relating to, and binding upon, the Collateral Servicer withrespect to the subject matter of the request, certificate or order in question.",0.89
TEL133,1.0,2,"With respect to theCollateral Administrator, any Officer, employee, partner or agent of the Collateral Administratorwho is authorized to act for the Collateral Administrator in matters relating to, and binding upon,the Collateral Administrator with respect to the subject matter of the request, certificate or orderin question.",1.0
TEL133,1.0,3,"With respect to the Trustee or any other bank or trust company acting as trustee ofan express trust or as custodian or the Loan Agent, a Trust Officer.",0.42
TEL133,1.0,4,"With respect to anyAuthenticating Agent, any Officer of such Authenticating Agent who is authorized toauthenticate the Offered Securities and/or the Mezzanine Bridge Notes.",0.29
TEL133,1.0,5,"Each party may receiveand accept a certification of the authority of any other party as conclusive evidence of theauthority of any person to act, and such certification may be considered as in full force and effectuntil receipt by such other party of written notice to the contrary",0.42


In [13]:
# Color coding for comparison text
df_comp.style.apply(color_df,axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Text,Sent_Score
Indenture,Similarity_Score,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
VIBR3,0.921,0,"With respect to the Issuer or the Co-Issuer, any Officer orany other Person who is authorized to act for the Issuer or the Co-Issuer, as applicable, in mattersrelating to, and binding upon, the Issuer or the Co-Issuer.",0.6
VIBR3,0.921,1,"With respect to the Portfolio Manager,any Officer, employee, member or agent of the Portfolio Manager who is authorized to act forthe Portfolio Manager in matters relating to, and binding upon, the Portfolio Manager withrespect to the subject matter of the request, certificate or order in question.",0.81
VIBR3,0.921,2,"With respect to theCollateral Administrator, any Officer, employee, partner or agent of the Collateral Administratorwho is authorized to act for the Collateral Administrator in matters relating to, and binding upon,the Collateral Administrator with respect to the subject matter of the request, certificate or orderin question.",1.0
VIBR3,0.921,3,"With respect to the Trustee or any other bank or trust company acting as trustee ofan express trust or as custodian or the Class A-1-R Loan Agent, a Trust Officer.",0.42
VIBR3,0.921,4,"With respect toany Authenticating Agent, any Officer of such Authenticating Agent who is authorized toauthenticate the Notes.",0.3
VIBR3,0.921,5,"Each party may receive and accept a certification of the authority of anyother party as conclusive evidence of the authority of any person to act, and such certificationmay be considered as in full force and effect until receipt by such other party of written notice tothe contrary",0.43
MIDOCC2,0.8839,0,"With respect to the Issuer or the Co-Issuer, any Officer or anyother Person who is authorized to act for the Issuer or the Co-Issuer, as applicable, in mattersrelating to, and binding upon, the Issuer or the Co-Issuer.",0.6
MIDOCC2,0.8839,1,"With respect to the Portfolio Manager,any Officer, employee, member or agent of the Portfolio Manager who is authorized to act forthe Portfolio Manager in matters relating to, and binding upon, the Portfolio Manager withrespect to the subject matter of the request, certificate or order in question.",0.81
MIDOCC2,0.8839,2,"With respect to theCollateral Administrator, any Officer, employee, partner or agent of the Collateral Administratorwho is authorized to act for the Collateral Administrator in matters relating to, and binding upon,the Collateral Administrator with respect to the subject matter of the request, certificate or orderin question.",1.0
MIDOCC2,0.8839,3,"With respect to the Trustee or any other bank or trust company acting as trustee ofan express trust or as custodian, a Trust Officer.",0.37


# Highlight Difference

In [14]:
from diff_highlight import find_text, inline_diff

In [15]:
comp1_name = 'TEL133'
comp2_name = 'MIDOCC2'

In [16]:
comp1 = find_text(comp1_name, all_info)
comp2 = find_text(comp2_name, all_info)

from colored import fg, bg, attr, stylize
print ('%s Target text: %s' % (fg(1), attr(0)))
print(comp1,'\n')

# comp1 will be the banchmark
# comp2 is the comparison text
# return comp2 with highlight color
print ('%s Comparison text: %s' % (fg(1), attr(0)))
print(inline_diff(comp2, comp1))

[38;5;1m Target text: [0m
With respect to the Issuer or the Co-Issuer, any Officer orany other Person who is authorized to act for the Issuer or the Co-Issuer, as applicable, in mattersrelating to, and binding upon, the Issuer or the Co-Issuer. With respect to the Collateral Servicer,any Officer, employee, member or agent of the Collateral Servicer who is authorized to act forthe Collateral Servicer in matters relating to, and binding upon, the Collateral Servicer withrespect to the subject matter of the request, certificate or order in question. With respect to theCollateral Administrator, any Officer, employee, partner or agent of the Collateral Administratorwho is authorized to act for the Collateral Administrator in matters relating to, and binding upon,the Collateral Administrator with respect to the subject matter of the request, certificate or orderin question. With respect to the Trustee or any other bank or trust company acting as trustee ofan express trust or as custodian o

In [17]:
# Close the database
conn.close()