# Attorney Finder

### Imports

In [10]:
import ipywidgets as widgets
from scipy import stats
from fuzzywuzzy import process
from operator import itemgetter
from stanfordcorenlp import StanfordCoreNLP as sfnlp
import re

import pandas as pd
import numpy as np
import datetime
import json
from bs4 import BeautifulSoup
import requests
from googleapiclient.discovery import build


import itertools
from scipy.sparse import csr_matrix, coo_matrix

import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt

from elasticsearch import Elasticsearch
import warnings
warnings.filterwarnings('ignore')

full = pd.read_csv('full.csv')
full.drop('Unnamed: 0', axis=1, inplace=True)
full['email'] = [i.lower() for i in full['email']]
full = full.groupby(['email', 'case_id']).agg({'weight':'sum'}).reset_index()
case_cite_tag = pd.DataFrame.from_csv('case_cite_tag.csv')
case_cite_tag.drop('weight', axis=1, inplace=True)
full = full.merge(case_cite_tag, how='left', on='case_id')
cites_common = pd.DataFrame.from_csv('cites_common.csv')
full = full.merge(cites_common, how='left', on='case_id')
full = full[full['cite_common_count'] < 250]
full['pt_score'] = full['updated_NOS']/full['cite_common_count']

In [11]:
widgets.ToggleButton(
    value=False,
    description='Click me',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Description',
    icon='check'
)

In [12]:
def str_clean(string):
    try:
        return string.replace('\n',' ').strip()
    except AttributeError:
        return np.nan

In [13]:
def plaintiff_from_brief_id(plaint, brief_id):
    if plaint is np.nan:
        try:
            return brief_id.split('_')[0].split('-v-')[0].replace('-', ' ')
        except IndexError:
            return plaint
    else:
        return plaint
    
    
def defendant_from_brief_id(defend, brief_id):
    if defend is np.nan:
        try:
            return brief_id.split('_')[0].split('-v-')[1].replace('-', ' ')
        except IndexError:
            return defend
    else:
        return defend


In [14]:
def drop_empty(string):
    if " " == string:
        return
    if "" == string:
        return
    else:
        return string

In [15]:
def remove_fluff(string, reg, insert=''):
    return reg.sub(insert, string)

In [16]:
def cleaning(blah):
    try:
        wah = blah.replace(';', ',')
    except AttributeError:
        return blah
    reg = re.compile('and DOES [0-9]', re.IGNORECASE)

    inc1 = re.compile(' INC\.', re.IGNORECASE)
    inc2 = re.compile(' INC ', re.IGNORECASE)
    inc3 = re.compile(' INC$', re.IGNORECASE)
    llc = re.compile('L(\.)?L(\.)?C(\.)?', re.IGNORECASE)
    ltd = re.compile('L(\.)?T(\.)?D(\.)?', re.IGNORECASE)
    gmbh = re.compile('GMBH', re.IGNORECASE)
    co = re.compile('CO(\.)? ', re.IGNORECASE)
    etal = re.compile('et al(\.)?', re.IGNORECASE)
    inre1 = re.compile('in re\.', re.IGNORECASE)
    inre2 = re.compile('in re ', re.IGNORECASE)
    lp = re.compile('(l)?(\.)?l(\.)?p(\.)?', re.IGNORECASE)
    pc = re.compile(' P(\.)?C(\.)?', re.IGNORECASE)

    filt_list = [inc1, inc2, inc3, llc, ltd, gmbh, co, etal, inre1, inre2, lp, pc]

    for i in filt_list:
        wah = remove_fluff(wah, i)
    try:
        result = reg.search(wah)
        wah = wah[:result.start()].split(',')
    except AttributeError:
        wah = wah.split(',')
    wah = [i for i in wah if drop_empty(i) != None]
    wah = [i.strip() for i in wah]
    for i in range(len(wah)):
        if (wah[i][:3] == 'and') or (wah[i][:3] == 'AND'):
            wah[i] = wah[i][4:]
    return wah

In [17]:
party_mast = pd.DataFrame.from_csv('parties.csv')

In [18]:
def state_city_us(bid, desc):
    if desc == 'individual':
        return 'Individual'
    reg_1 = re.compile('^USA$', re.IGNORECASE)
    reg_2 = re.compile('^state of', re.IGNORECASE)
    reg_3 = re.compile('state of$', re.IGNORECASE)
    reg_4 = re.compile('^United States of America$', re.IGNORECASE)
    reg_5 = re.compile('^the city', re.IGNORECASE)
    reg_6 = re.compile('city of$', re.IGNORECASE)
    reg_6 = re.compile('^city of', re.IGNORECASE)
    
    filt_list = [reg_1, reg_2, reg_3, reg_4, reg_5, reg_6]
    try:
        tf_list = [bool(i.search(bid)) for i in filt_list]
    except TypeError:
        return np.nan
    if sum(tf_list) >= 1:
        return 'City/State/US'
    else:
        return np.nan
        
party_mast['desc'] = [state_city_us(party_mast['parties'][i], party_mast['desc'][i]) for i in range(len(party_mast))]

In [19]:
party_mast.head(40)

Unnamed: 0,brief_id,parties,desc
0,gabriel-et-al-v-fischer-et-al_memorandum-of-la...,gabriel,Individual
1,williamson-v-verizon-services-corp-et-al_reply...,williamson,Individual
2,price-v-chalmette-refining-llc-et-al_motion-fo...,price,Individual
3,veasey-et-al-v-wilkins-jr_reply-to-response-to...,veasey,Individual
4,clearlake-shipping-pte-ltd-v-ow-bunker-switzer...,clearlake shipping pte,
5,baltazar-et-al-v-apple-inc_motion-to-strike-22...,baltazar,Individual
6,sierra-club-v-mccarthy_motion-for-summary-judg...,sierra club,
7,log-cabin-republicans-v-united-states-of-ameri...,log cabin republicans,
8,hysitron-incorporated-v-mts-systems-corporatio...,hysitron incorporated,
9,citizens-for-responsibility-and-ethics-in-wash...,citizens for responsibility and ethics in wash...,


# Creating Attorney Matrix

In [21]:
attorney_map = {}
case_map = {}
attorney_idx = 0
case_idx = 0

for index,email,case_id, weight,updated_NOS, cites_common_count, pt_score in full.itertuples():
    if email not in attorney_map:
        attorney_map[email] = attorney_idx
        attorney_idx+=1
    if case_id not in case_map:
        case_map[case_id] = case_idx
        case_idx+=1
I,J, data = [],[],[]
for index, attorney, case_id, weight,updated_NOS, cites_common_count, pt_score in full.itertuples():
    I.append(attorney_map[attorney])
    J.append(case_map[case_id])
    data.append(weight) 
n = len(attorney_map)
m = len(case_map)
M = coo_matrix((data, (I, J)), (n, m))
M = M.tocsr()
M = (M.dot(M.transpose())) 
for i in range(M.shape[0]):
    M[i,i] = 0
del I,J,data
emails = list(np.unique(full['email']))

In [22]:
def top_results(attorneys, matrix1=M, num_results=5, end1=emails, begin1=attorney_map, printing1 = True):
    A = matrix1.shape[0]
    I, J, V = [], [], []
    for i in attorneys:
        I.append(attorney_map[i])
        J.append(0)
        V.append(1)
    q_vec = coo_matrix((V,(I,J)),shape=(A, 1))
    resultant_vec = matrix1.dot(q_vec).todense()
    results = [item for sublist in resultant_vec.tolist() for item in sublist]
    results_indexed = list(enumerate(results))
    sorted_results = sorted(results_indexed, key=itemgetter(1))
    final_index = sorted_results[-num_results:]
#     attorneys_res = []
#     for i in final_index:
#         attorneys_res.append(end1[i[0]])
#     if printing1:
#         count = num_results
#         for i in attorneys_res:
#             print('Attorney number %d: ' %count, str(i))
#             count -=1
#     return attorneys_res + attorneys
    return final_index, resultant_vec

In [23]:
def two_pass(attorneys, matrix=M, first_pass =20, second_pass = 5, end=emails, begin=attorney_map, printing = False, print_out = True):
    index, vector = top_results(attorneys, matrix1=matrix, num_results=first_pass, end1=end, begin1=begin, printing1=printing)
    vector = np.matrix(np.array([0 for i in range(M.shape[0])]).reshape(M.shape[0], 1))
    for i in index:
        vector = vector + top_results([emails[i[0]]])[1]
    for i in attorneys:
        vector[attorney_map[i]] = 0
    results = [item for sublist in vector.tolist() for item in sublist]
    results_indexed = list(enumerate(results))
    sorted_results = sorted(results_indexed, key=itemgetter(1))
    final_index = sorted_results[-second_pass:]
    attorneys_res = []
    for i in final_index:
        attorneys_res.append(end[i[0]])
    if print_out:
        count = second_pass
        for i in attorneys_res:
            print('Attorney number %d: ' %count, str(i))
            count -=1
    return attorneys_res
        

In [28]:
two_pass([a2])

Attorney number 5:  pstphillip@lowey.com
Attorney number 4:  vbriganti@lowey.com
Attorney number 3:  lnussbaum@nussbaumpc.com
Attorney number 2:  ghorn@lowey.com
Attorney number 1:  clovell@lshllp.com


['pstphillip@lowey.com',
 'vbriganti@lowey.com',
 'lnussbaum@nussbaumpc.com',
 'ghorn@lowey.com',
 'clovell@lshllp.com']

In [26]:
a1 = 'gjobson@fenwick.com'
a2 = 'aargiropoulos@ebglaw.com'
a3 = 'lnussbaum@nussbaumpc.com'

In [21]:
blah = full[(full['email'] == a1)]['case_id']
wah = full[(full['email'] == 'dorian.daley@oracle.com')]['case_id']
set(blah).intersection(set(wah))

{'firoozye-v-earthlink-network', 'summit-mach-tool-mfg-v-victor-cnc-systems'}

In [None]:
first_pass = widgets.IntSlider(min=0,max=20,step=1,value=5, description='First Pass Value:')
display(first_pass)

In [123]:
URL="https://3a7721e6.qb0x.com:30022"

## Initializing Elastic search object

es = Elasticsearch(URL)#, use_ssl= True, verify_certs=False)

lawyers = es.search(index="briefs", body={
    "query": {
        "match": { "plaintiff_emails": 'dorian.daley@oracle.com'}
    },
    "_source": ["plaintiff_emails"],
    "size":100000
})['hits']['hits']
lawyers

[{'_id': 'oracle-america-inc-et-al-v-hewlett-packard-enterprise-company_motion-to-strike-87-amended-answer-to-complaint-hpes-affirmative-defense-nos',
  '_index': 'briefs',
  '_score': 3.7747705,
  '_source': {'plaintiff_emails': ['brittany.lovejoy@lw.com',
    'chris.yates@lw.com',
    'christopher.campbell@lw.com',
    'deborah.miller@oracle.com',
    'dorian.daley@oracle.com',
    'jeff.ross@oracle.com',
    'meaghan.thomas-kennedy@lw.com']},
  '_type': 'doc'},
 {'_id': 'oracle-america-inc-et-al-v-hewlett-packard-enterprise-company_response-to',
  '_index': 'briefs',
  '_score': 3.3380613,
  '_source': {'plaintiff_emails': ['brittany.lovejoy@lw.com',
    'chris.yates@lw.com',
    'christopher.campbell@lw.com',
    'deborah.miller@oracle.com',
    'dorian.daley@oracle.com',
    'jeff.ross@oracle.com',
    'meaghan.thomas-kennedy@lw.com']},
  '_type': 'doc'},
 {'_id': 'oracle-usa-inc-et-al-v-rimini-street-inc-et-al_motion-to-dismiss',
  '_index': 'briefs',
  '_score': 2.8272455,
  '_s

### USPTO Querying

In [None]:
what = !node ../tess-search-master/cli.js 'Mizuho'[bi,mp,ti] and 'bank'[gs]

In [None]:
what[12]

In [None]:
what = what[what.index('Logging out of TESS...')+1:]

In [None]:
what

In [62]:
import io
from IPython.display import display
import fileupload

def _upload():

    _upload_widget = fileupload.FileUploadWidget()

    def _cb(change):
        decoded = io.StringIO(change['owner'].data.decode('utf-8'))
        filename = change['owner'].filename
        print('Uploaded `{}` ({:.2f} kB)'.format(
            filename, len(decoded.read()) / 2 **10))

    _upload_widget.observe(_cb, names='data')
    display(_upload_widget)

_upload()

In [51]:
uploader = fileupload.FileUploadWidget()

def _handle_upload(change):
    w = change['owner']
    with open(w.filename, 'wb') as f:
        f.write(w.data)
    print('Uploaded `{}` ({:.2f} kB)'.format(
        w.filename, len(w.data) / 2**10))

uploader.observe(_handle_upload, names='data')

display(uploader)

<bound method Widget.on_displayed of <fileupload.widget.FileUploadWidget object at 0x000001F9B20044E0>>

In [36]:
import ipywidgets as widgets

In [59]:
first_pass = widgets.IntSlider(min=0,max=20,step=1,value=5, description='First Pass Value:')
display(first_pass)

In [61]:
second_pass = widgets.IntSlider(min=0,max=20,step=1,value=5, description='Second Pass Value:')
display(second_pass)