# Term Table

Display all terms of a specified indenture in one table

In [110]:
import sqlite3
import pandas as pd
import numpy as np
import string
from collections import defaultdict

def definition_table(sqlite_file, indenture_name, columns):    
    conn = sqlite3.connect(sqlite_file)
    c = conn.cursor()
    c.execute("select * FROM DEFINITION")
    all_info = c.fetchall()
    df = pd.DataFrame(all_info, columns=['Indentures', 'Term', 'Text'])
    term_list = df[df['Indentures']==indenture_name]['Term']
    
    # create the final dictionary
    dic = defaultdict(lambda : [])
    for term in term_list:
        if term[0] not in string.ascii_uppercase[:26].join(string.ascii_lowercase[:26]):
            dic['#'].append(term)
        else:
            dic[term[0].upper()].append(term)
            
    # create the final dataframe
    df = pd.DataFrame()
    for key,values in dic.items():
        if values:
            length = len(values) // columns
            rest_col = columns - len(values) % columns
            for i in range(length):
                df = df.append(pd.DataFrame(values[i*columns:(i+1)*columns], columns=[key]).T)
            if rest_col!=columns:
                df = df.append(pd.DataFrame(np.pad(values[(length)*columns:], (0,rest_col), 
                                              mode='constant', constant_values='-'),columns=[key]).T)
    df.index.name = 'Capital'
    return df

In [111]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('max_colwidth',200)

In [112]:
indenture_name = 'TEL133.20170810.dce07916-3ca5-2704-db92-fcfd7bbd6f53.5d35d0d4f9a56c3750c8880591dcb580.pdf'
sql_path = 'database.sqlite'

In [116]:
print(indenture_name)
definition_table(sql_path, indenture_name, columns=9)

TEL133.20170810.dce07916-3ca5-2704-db92-fcfd7bbd6f53.5d35d0d4f9a56c3750c8880591dcb580.pdf


Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8
Capital,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
#,25% Limitation,-,-,-,-,-,-,-,-
A,Accountants’ Report,Accounts,Act of Holders,Additional Issuance Threshold Test,Additional Subordinated Collateral Servicing Fee Condition,Adjusted Collateral Principal Amount,Adjusted Weighted Average Moody’s Rating Factor,Administrative Expense Cap,Administrative Expenses
A,Affected Class,Agent Members,Aggregate Excess Funded Spread,Aggregate Funded Spread,Aggregate Outstanding Amount,Aggregate Principal Balance,Aggregate Unfunded Spread,Applicable Issuers,Asset-Backed Commercial Paper
A,Assets,Assumed Reinvestment Rate,Authenticating Agent,Authorized Officer,-,-,-,-,-
B,Balance,Bank,Bankruptcy Exchange,Bankruptcy Exchange Test,Bankruptcy Subordination Agreement,Board of Directors,Board Resolution,Bond,Bridge Loan
B,Bridge Financing Period,Business Day,-,-,-,-,-,-,-
C,Caa Excess,Caa Excess Adjustment Amount,Calculation Agent,Cash,CCC Excess,CCC Excess Adjustment Amount,CEA,Certificated Notes,Certificated Secured Note
C,Certificated Security,Certificated Subordinated Note,Class,Class A Notes,Class A-R Notes,Class A/B Coverage Tests,Class B Notes,Class B-R Notes,Class C Coverage Tests
C,Class C Notes,Class C-R Notes,Class D Coverage Tests,Class D Notes,Class D-R Notes,Class Default Differential,Class E Coverage Tests,Class E Notes,Class E-R Notes
C,Class Scenario Default Rate,Class X Principal Amortization Amount,Clearing Corporation Security,Clearstream,Closing Date,Code,Co-Issuers,Collateral Administration Agreement,Collateral Administrator


# Search in SQL database

Search information in the pre-created SQL database

In [117]:
def sql_search(indenture, term):
    sqlite_file = 'database.sqlite'
    conn = sqlite3.connect(sqlite_file)
    c = conn.cursor()
    
    query1 = """
    SELECT definition
    FROM DEFINITION
    WHERE term = '%s' AND file_name = '%s' 
    """ %(term, indenture)
    c.execute(query1)
    target_text = c.fetchall()
    
    query2 = """
    SELECT file_name, definition
    FROM DEFINITION
    WHERE term = '%s' AND file_name != '%s' 
    """ %(term, indenture)
    c.execute(query2)
    all_info = c.fetchall()
    text_list = [x[1] for x in all_info]
    
    return target_text[0][0], text_list, all_info

In [118]:
term = input()
target_text, text_list, all_info = sql_search(indenture_name, term)

Qualified Purchaser


In [119]:
#text_list

# Similarity Table

Calculate similarity score by self-trained word vector with 100 dimensions and Word Mover's Distance algorithm

In [120]:
from Similarity import Similarity_score
import matplotlib.pyplot as plt

In [121]:
search_indenture = indenture_name
search_term = 'Volcker Rule'
target_text, text_list, all_info = sql_search(search_indenture, search_term)
model_path = './w2v_selftrained/w2v_100_sg.bin'

In [122]:
score = Similarity_score(target_text, text_list, './w2v_selftrained/w2v_100_sg.bin')
final_score = score.result()
# final_score

In [123]:
similarity_table = pd.concat([pd.DataFrame(all_info, columns=['Indentures', 'Term Text']), 
                              pd.DataFrame(final_score, columns=['Similarity Score'])], axis=1) 
similarity_table = similarity_table.sort_values(by=['Similarity Score'], ascending=False)
temp_df = pd.DataFrame([search_indenture, target_text, '-'], 
                       index=['Indentures', 'Term Text','Similarity Score']).T
similarity_table = pd.concat([temp_df,similarity_table])

In [124]:
similarity_table 
# 0 is the searched indenture

Unnamed: 0,Indentures,Term Text,Similarity Score
0,TEL133.20170810.dce07916-3ca5-2704-db92-fcfd7bbd6f53.5d35d0d4f9a56c3750c8880591dcb580.pdf,"Section 13 of the U.S. Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations thereunder",-
5,ALM6_1.20180611.b0eca308-20af-5a81-5bc5-a16f4bca5d7a.c6a25c15a03a42853fe1bc0eac9b00b3.pdf,"Section 13 of the U.S. Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations thereunder",1
15,STECR182.20180828.5e27ce4a-a011-1ffe-7711-1dd46c7729f0.a17f7cba4ba1ed0ef3c08d0af5245660.pdf,"Section 13 of the U.S. Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations thereunder",1
13,ALM5_1.20171018.ba8abc72-d777-9532-ad94-df43e9d30b21.6d9c77105fce25ddc7a940fdf928e08d.pdf,"Section 13 of the U.S. Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations thereunder",1
2,ELEV1707.20171208.93f8c7fc-3209-45d2-e906-4bf7ed958eda.b0e4c31900aa8ce13e6e8ea3622e1b44.pdf,"Section 13 of the U.S. Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations thereof",0.918943
3,TIAAC2.20170330.d6f30a84-ac69-df29-935a-2ab882bf07b8.ca2ecc44324e3f9025f10ab8745e8298.pdf,"Section 13 of the U.S. Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations thereof",0.918943
22,ELEV1706.20170725.f9d21b79-f782-3bd6-a888-029c45f7e7f5.0db960542a5f8bd5821395180c9df4e9.pdf,"Section 13 of the U.S. Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations thereof",0.918943
7,ARROP142.20171020.e2f54cae-545d-0923-7ff5-956b10192820.52ca4243b4c26dbdfe7a9498e739ab9d.pdf,"Section 13 of the U.S. Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations thereof",0.918943
20,ARROP154.20150423.ed76b9d5-293a-1ed4-9397-86c3ebc71dba.d41fd910b1c45aa0dd40f2950dfc9fa6.pdf,"Section 13 of the U.S. Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations thereof",0.918943
19,BATTL7.20180718.7a551faa-3762-7df9-745b-f25a169a6689.e592db3fd9de59ff8da0e9ddb214dc23.pdf,"Section 13 of the Bank Holding Company Act of 1956, asamended, and the applicable rules and regulations promulgated thereunder",0.906027
