-
Notifications
You must be signed in to change notification settings - Fork 0
/
streamlit_app.py
50 lines (44 loc) · 1.67 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import json
import streamlit as st
import numpy as np
import pickle
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
@st.cache_resource
def load_model():
model = SentenceTransformer('bert-base-nli-mean-tokens')
return model
st.markdown("# 📃 tc*")
st.markdown("### a tool that can read Terms & Conditions agreements and flag anything that is non-standard.")
tc = st.text_input("Enter the Terms & Conditions document")
if st.button("Analyze"):
if tc=='':
st.error("Please enter input T&C!")
else:
model = load_model()
pickle_in = open("embeddings.pkl","rb")
stored_data=pickle.load(pickle_in)
stored_sentences = stored_data['sentences']
stored_embeddings = stored_data['embeddings']
obj = {}
sentences = tc.split(". ")
for i in sentences:
if len(i) < 5:
sentences.remove(i)
embeddings = model.encode(sentences)
for i in range(0, len(sentences)):
arr = cosine_similarity([embeddings[i]], stored_embeddings[0:])
if(np.max(arr) < 0.85):
obj[sentences[i]] = np.max(arr)
json_string = json.dumps(str(obj))
tc="<style>.flag{color: red;}</style>"+tc
for key in obj:
substr=key
beg="<div class='flag'><b>"
end="</b></div>"
idx = tc.index(substr)
temp = idx+len(substr)
tc=tc[:idx] + beg + substr + end + tc[temp:]
st.success("Found a few non-standard clauses. They're flagged below.")
st.markdown(tc, unsafe_allow_html=True)