In [1]:
# Module imports
import os
from bs4 import BeautifulSoup
import pickle
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [26]:
# Let's try setting up a class Case with properties "corpus", "wordcount", "casenum""
import os
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

class Case:
    def __init__(self, textfile):
        assert os.path.isfile(textfile) == True
        # We start out with just the raw text string from the pdf
        # We should also initialize the casenumber attribute?
        with open(textfile, "rb") as file: 
            self.text = pickle.load(file)
            self.casenumber = os.path.splitext(os.path.split(textpath)[1])[0]
    # We should have a method for doing text cleanup and a method for 
    # count vectorization
    def make_corpus(self):
        '''Adds corpus attribute to the Case object by cleaning
        the raw text string'''
        # Make text lowercase: 
        text = self.text
        text = text.lower()
        # Strip all newlines: 
        text = re.sub('\n', '', text)
        self.corpus = text
    def make_unigram_matrix(self):
        '''Adds frequency-matrix-of-unigrams attribute to Case object. Requires
        that corpus attribute already exists.'''
        # currently redundant with ngram matrix
        cv = CountVectorizer(input='content',stop_words = 'english')
        freq_cv = cv.fit_transform([self.corpus])
        freq_matrix = pd.DataFrame(freq_cv.toarray(), columns = cv.get_feature_names())
        # Sort the words by frequency, most frequent first
        # freq_matrix = freq_matrix.sort_values(by=0,axis=1, ascending=False)
        # freq_matrix = freq_matrix.transpose()
        self.unigram_matrix = freq_matrix
    def make_ngram_matrix(self, n):
        '''Adds frequency-matrix-of-ngrams attribute to Case object. Requires
        that corpus attribute already exists.'''
        cv = CountVectorizer(input='content',stop_words = 'english',strip_accents='unicode',ngram_range=(1,n))
        freq_cv = cv.fit_transform([self.corpus])
        freq_matrix = pd.DataFrame(freq_cv.toarray(), columns = cv.get_feature_names())
        # Sort the words by frequency, most frequent first
        # freq_matrix = freq_matrix.sort_values(by=0,axis=1, ascending=False)
        # freq_matrix = freq_matrix.transpose()
        # note: transposing matrix makes the to_string method produce
        # a nice output, but makes indexing by ngram slightly harder?
        self.ngram_matrix = freq_matrix
    
    def make_refined_corpus(self):
        regex_spaces = '(\s+)'
        self.refined_corpus = re.sub(r'(\s+)', ' ', self.corpus)
            
    def save_to_file(self,filepath):
        with open(filepath, 'w') as f:
            f.write(self.refined_corpus)
  
        

One thing that's caused problems using the corpus is that the whitespace is very irregular, meaning matching expressions was impeded by the presence of tabs, returns, and long strings of space between words.

In [28]:
with open('short.bin', 'rb') as file: 
    shortlist = pickle.load(file)

In [35]:
def make_refined_corpus(string):
    regex_spaces = '(\s+)'
    refined_corpus = re.sub(r'(\s+)', ' ', string)
    refined_corpus = re.sub(r'[^\x00-\x7F]+', '', refined_corpus)
    return refined_corpus



In [27]:
test_case = shortlist[0]
test_case.corpus


'filed 4/2/12  p. v. khrayan ca2/2 not to be published in the official reports  california rules of court, rule 8.1115(a), prohibits courts and parties from citing or relying on opinions not certified for publication or ordered published, except as specified by rule 8.1115(b).  this opinion has not been certified for publication or ordered published for purposes of rule 8.1115.  in the court of appeal of the state of california  second appellate district  division two   the people,   plaintiff and respondent,   v.  arutyun khrayan,   defendant and appellant.        b213582        (los angeles county       super. ct. nos. ba255474, ba255302)      appeal from a judgment of the superior court of los angeles county.  kathleen kennedy, judge.  affirmed.   geragos & geragos and mark j. geragos for defendant and appellant.   kamala d. harris, attorney general, dane r. gillette, chief assistant attorney general, lance e. winters, assistant attorney general, linda c. johnson, robert david breto

In [41]:
clean_corpus_list = []
folder = os.path.join(os.path.abspath(''),'refined_corpuses')
for case in shortlist:
    clean_corpus = make_refined_corpus(case.corpus)
    writefile = os.path.join(folder,'%s.txt' % case.casenumber)
    with open(writefile, 'w') as f: 
        f.write(clean_corpus)
    clean_corpus_list.append(clean_corpus)

In [42]:
len(clean_corpus_list)

8474

In [46]:
casenumbers_la_felonies = []
for case in shortlist:
    casenumbers_la_felonies.append(case.casenumber)
with open('list_of_casenumbers.txt', 'wb') as f: 
    pickle.dump(casenumbers_la_felonies, f)

In [40]:
clean_corpus_list[6]

'filed 4/11/13 p. v. lozano ca2/3 not to be published in the official reports california rules of court, rule 8.1115(a), prohibits courts and parties from citing or relying on opinions not certified for publication or ordered published, except as specified by rule 8.1115(b). this opinion has not been certified for publication or ordered published for purposes of rule 8.1115. in the court of appeal of the state of california second appellate district division three the people, plaintiff and respondent, v. sammy lozano, defendant and appellant. b233393 (los angeles county super. ct. nos. ta077104 & ta080053) appeal from a judgment of the superior court of los angeles county, allen joseph webster, judge. affirmed with directions. george l. schraer for defendant and appellant. kamala d. harris, attorney general, dane r. gillette, chief assistant attorney general, lance e. winters, assistant attorney general, paul m. roadarmel, jr., stephanie a. miyoshi and william n. frank, deputy attorney

In [54]:
test_corpus_clean = clean_corpus_list[10]

In [69]:
# Get filing date
# Search for the string "filed " followed by a date
# return the date
def get_filing_date(corpus):
    strings = re.search(r'filed \d+/\d+/\d+', corpus)
    if len(strings) == 1:
        string = strings[0]
        date = string.replace('filed ','')
        l = date.split('/')
        date_info = {'date':date, 'month':int(l[0]), 'day':int(l[1]), 'year':int(l[2]) + 2000}
        return date_info
    elif len(strings) == 0: 
        print('No dates found.')
    else: 
        print('Multiple dates found.')
    return strings

In [73]:
for corps in clean_corpus_list[0:20]:
    print(get_filing_date(corps)['year'])


2012
2012
2012
2012
2012
2012
2013
2012
2012
2013
2012
2012
2012
2012
2012
2012
2012
2013
2012
2013


In [150]:
# Get defendant name
# Search for "respondent, v. " or similar followed by
# "name, defendant"
# return name
def get_defendant(corpus):
    defendant_name = re.findall(r'respondent, vs?\. .{3,100}?, defendants?\b', corpus)
    if len(defendant_name) == 0:
        defendant_name = ['oop', 'oh no']
    return defendant_name

In [151]:
defendant_search_term = r'respondent, vs?\. .+, defendants?'
p = get_defendant(test_corpus_clean[0])
p

['oop', 'oh no']

In [152]:
for corpus in clean_corpus_list:
    try:
        print(get_defendant(corpus)[0])
    except IndexError:
        print(len(get_defendant(corpus)))
        continue

respondent, v. arutyun khrayan, defendant
respondent, v. arsellers c. scott, defendant
respondent, v. terrell frazier et al., defendants
respondent, v. corey mcclelland et al., defendants
respondent, v. peter thomas rodriguez, defendant
respondent, v. nicole lenea parson, defendant
respondent, v. sammy lozano, defendant
oop
respondent, v. william edson kingsland, defendant
respondent, v. paki john bronson, defendant
respondent, v. margarito villasenor, defendant
respondent, v. johnny d. horton, defendant
respondent, v. tony thomas, defendant
respondent, v. rudy ruiz, defendant
respondent, v. david wesley cassis, defendant
respondent, v. michael barrios, defendant
respondent, v. oliver wendell james, defendant
respondent, v. arvind sinha, defendant
respondent, v. scott bradley millwee, defendant
respondent, v. jesus duran aguayo, et al., defendants
respondent, v. kehende m. lang, defendant
respondent, v. patrick edward panzarello, defendant
respondent, v. robert andrew repka, defendant


respondent, v. roberto avila, defendant
respondent, v. chris p. velasquez, defendant
respondent, v. richard kyle johnson, defendant
respondent, v. roland windsor vincent, defendant
respondent, v. michael carl helm, defendant
respondent, v. richard lavel hill, defendant
respondent, v. prince ayree, defendant
respondent, v. miguel g. hernandez et al., defendants
respondent, v. gabriel orozco, defendant
respondent, v. arthur lee smart, defendant
respondent, v. christopher stratis et al., defendants
respondent, v. kristopher thomas, defendant
respondent, v. adan mejia, defendant
respondent, v. ricky white, defendant
respondent, v. guillermo miranda, defendant
respondent, v. domingo manriques, defendant
respondent, v. joadanus jerome olivas, defendant
respondent, v. israel soto, defendant
respondent, v. david wayne spivey, defendant
respondent, v. jose l. molina, defendant
respondent, v. lamar david williams, defendant
respondent, v. bartolome nava, defendant
respondent, v. devonte ross, de

respondent, v. deante duckett, defendant
respondent, v. jamal rehman, defendant
respondent, v. ronald batiste, defendant
respondent, v. paul anthony wiley, defendant
respondent, v. valentin polyvko, defendant
respondent, v. husie outing, defendant
respondent, v. daryl lynch, defendant
respondent, v. angel juarez, defendant
respondent, v. keith maxwell, defendant
respondent, v. steve chavez, defendant
respondent, v. charles allen jackson, defendant
respondent, v. lorenzo micquell latimer, defendant
respondent, v. oscar vargas, defendant
respondent, v. william wall et al., defendants
respondent, v. rodney cyril carr, defendant
respondent, v. james landrum, sr., defendant
respondent, v. luis villalvazo et al., defendants
respondent, v. santos mejia, defendant
respondent, v. necole richards, defendant
respondent, v. tommy glenn crane, defendant
respondent, v. daniel enrique galvez, defendant
respondent, v. michael humberto munguia, defendant
respondent, v. ryan amirant, defendant
responden

respondent, v. juan francisco polasek, defendant
respondent, v. nanine nanez, defendant
respondent, v. jason st. pierre, defendant
respondent, v. martin oliveras, defendant
respondent, v. henry anthony ayala, defendant
respondent, v. myford david jenkins, defendant
respondent, v. martin sotelo, defendant
respondent, v. leon sims, defendant
respondent, v. gregory vernell farmer, defendant
respondent, v. gary wright, defendant
respondent, v. larry arthur list, defendant
respondent, v. randy doby et al., defendants
respondent, v. norma lilian cortez et al., defendants
respondent, v. enrique silva santoyo, defendant
respondent, v. michelle traniece davis, defendant
respondent, v. juan carlos huezo, defendant
respondent, v. william ruiz, defendant
respondent, v. salvador merced, defendant
respondent, v. quintin maddox, jr., defendant
respondent, v. stacey marie barker, defendant
respondent, v. levitius daniel wright, defendant
respondent, v. herbert henderson, defendant
respondent, v. jason

respondent, v. daniel belmonte, defendant
respondent, v. jaron deandre lucien, defendant
respondent, v. jimmy encinas, defendant
respondent, v. luis enrique fernandez, defendant
respondent, v. edgar a. carrillo, defendant
respondent, v. joseph sam leon, defendant
oop
respondent, v. davione mcdowell, defendant
respondent, v. rudolph edwards, defendant
respondent, v. steve chavez, defendant
respondent, v. adrian rodriguez, defendant
respondent, v. carlos a. rodriguez, defendant
respondent, v. wesley stanley cotton, defendant
respondent, v. artist dwayne hardy, iii, defendant
respondent, v. tanya christine byer, defendant
respondent, v. charles williams, defendant
respondent, v. herber morales, defendant
respondent, v. luis miguel noriega, defendant
respondent, v. valentino rodriguez, defendant
respondent, v. alberto h. fonseca, defendant
respondent, v. joshua pablo rosales, defendant
respondent, v. donyell ladale butler, defendant
oop
respondent, v. christopher laurence barnum, defendant

respondent, v. jonathan reyes, defendant
respondent, v. alejandro aviles, defendant
respondent, v. sean williams, defendant
respondent, v. darrell rucker, defendant
respondent, v. pablo garcia flores, defendant
respondent, v. garnik sahakian, defendant
oop
respondent, v. fred dipaolo, defendant
respondent, v. michael angelo serrato, defendant
respondent, v. steven paul moore, defendant
respondent, v. daniel chong and charlie wi wang, defendants
respondent, v. jamal mosley, defendant
respondent, v. frank dimarco, defendant
respondent, v. ricardo dejesus esquivel, defendant
respondent, v. arthur franklin knox, defendant
respondent, v. tedroy davis, defendant
respondent, v. kevin w. king, defendant
respondent, v. rohan anderson, defendant
respondent, v. michael allen garner, defendant
respondent, v. jimmy mcnabb, iii, defendant
respondent, v. kimberly grace munson, defendant
respondent, v. juan gabriel mosqueda, defendant
respondent, v. ernest cardenas cardona, defendant
respondent, v. ar

respondent, v. vincent robert casio, defendant
respondent, v. richard devonn webb, defendant
respondent, v. agustine martinez, defendant
respondent, v. hugo eliseo cabrera, defendant
respondent, v. curtis pulley, defendant
respondent, v. ronnie price, defendant
respondent, v. roberto camacho, defendant
respondent, v. jerome canady, defendant
respondent, v. james e. stewart, defendant
respondent, v. willie pogues, defendant
respondent, v. gabriel delgado, defendant
respondent, v. urban titus todd, defendant
respondent, v. marc christian steigleder, defendant
respondent, v. miguel galicia, defendant
respondent, v. jen chi liu, defendant
respondent, v. david clifton solomon, defendant
respondent, v. johnny ventura, defendant
respondent, v. gary devaughn lebon, defendant
respondent, v. david wilkes, defendant
respondent, v. luis lazaro carranza et al., defendants
respondent, v. alfredo ascencio, defendant
respondent, v. angel samos, defendant
respondent, v. charles holmes, jr., defendant
r

respondent, v. otilia castrellon-zamora, defendant
respondent, v. mohamad bedier et al., defendants
respondent, v. angel luis rivera, defendant
respondent, v. brian zachary kintz, defendant
respondent, v. whytinnie lee gilbert et al., defendants
respondent, v. ronald davenport, defendant
respondent, v. steve gonzalez, defendant
respondent, v. stefone f. kirk, defendant
respondent, v. german jimenez, defendant
respondent, v. harry lee glaser, defendant
respondent, v. pedro cantu, defendant
respondent, v. michael calimon, defendant
respondent, v. steven candler, defendant
respondent, v. ricardo hernandez, defendant
respondent, v. gary anthony sanchez, jr., defendant
respondent, v. thai yang, defendant
respondent, v. jimmy santana, defendant
respondent, v. donald eugene phillips, defendant
respondent, v. rico r. gutierrez, defendant
oop
respondent, v. semaj johnson, defendant
respondent, v. william scott roberts, defendant
respondent, v. alan day, defendant
respondent, v. eduardo herrera 

respondent, v. thomas scott, defendant
respondent, v. eric sean brown, jr., et al., defendants
respondent, v. crystal mejia et al., defendants
respondent, v. aaron victor stathum, defendant
respondent, v. timothy daniel hall, defendant
respondent, v. jesse gonzalez, defendant
respondent, v. kevin lee goyette, defendant
respondent, v. dennis wallace et al., defendants
respondent, v. eric jose ortiz and ricardo velasquez, defendants
respondent, v. randal a. ruiz, defendant
respondent, v. manuel robledo, defendant
respondent, v. steven carvajal, defendant
respondent, v. larry d. hewitt, defendant
respondent, v. jesus cueva garcia, defendant
respondent, v. quinten white, defendant
respondent, v. omar sanchez et al., defendants
respondent, v. gregory jimenez salcido, defendant
oop
respondent, v. jaime aguayo, defendant
respondent, v. domanick campbell, defendant
respondent, v. albert martin thierry, jr., defendant
respondent, v. richard edmond, defendant
respondent, v. hateem abdul shareef,

respondent, v. edward benavidez, defendant
respondent, v. jerome lamont gardner, defendant
respondent, v. sam edward anderson, defendant
respondent, v. carlos ortiz, defendant
respondent, v. noe ramirez, defendant
respondent, v. james rogers smith, defendant
respondent, v. aaron esqueda rojas, defendant
respondent, v. william henry baldwin, jr., et al., defendants
respondent, v. brian virden, defendant
respondent, v. ogo gueye, defendant
respondent, v. roberto a. isida, defendant
respondent, v. eli dominic wright, defendant
respondent, v. natasha jean pereira, defendant
respondent, v. carlos guzman, defendant
respondent, v. rosean damont taylor, defendant
respondent, v. noe baeza et al., defendants
respondent, v. jeffery l. johnson, defendant
respondent, v. oscar delgadoplatero, defendant
respondent, v. jean lambey, defendant
respondent, v. joseph babin, defendant
respondent, v. frank lee smith, jr., defendant
respondent, v. gerardo haro, defendant
respondent, v. rickey mcpherson, defe

respondent, v. jonathan navarette, defendant
respondent, v. julio recio, defendant
respondent, v. donald hudson, defendant
respondent, v. carlos alexander lopez, defendant
respondent, v. fred nowden, defendant
respondent, v. justin thomas miller, defendant
respondent, v. melvin parker, defendant
respondent, v. bruce westin, defendant
respondent, v. carlos chavira, defendant
respondent, v. tyrone kennedy, defendant
respondent, v. jamurl a. scott, defendant
respondent, v. anthony james carillo, defendant
respondent, v. ricky ray vinson, defendant
respondent, v. juan gilberto medrano, defendant
respondent, v. timothy s. jones, defendant
respondent, v. michael perez, defendant
respondent, v. ivan alquicira, defendant
respondent, v. richard delvone ray, defendant
respondent, v. rudolph aguilar, defendant
respondent, v. raymond hurdle osborne, defendant
respondent, v. martin macias landeros et al., defendants
respondent, v. kenneth wayne mills, defendant
respondent, v. edgar mazyck, defendan

respondent, v. lee white, defendant
respondent, v. barbara grady, defendant
respondent, v. raul torres, jr., defendant
respondent, v. willie theophus lewis, defendant
respondent, v. domunique ruff, defendant
respondent, v. ferone lawrence tweedy, defendant
respondent, v. dennis kevin coronado, defendant
respondent, v. ramiro lazaro, defendant
respondent, v. dwayne basil west, defendant
respondent, v. otis clements, defendant
respondent, v. rene velasquez, defendant
respondent, v. harum patterson, defendant
respondent, v. ulises orona, defendant
respondent, v. hector francisco molina, defendant
respondent, v. james leonard nyquist, defendant
respondent, v. lawrence gomes, defendant
respondent, v. aaron manuel mora, defendant
respondent, v. derek sida, defendant
respondent, v. christopher surico, defendant
respondent, v. robert larry walker, jr., defendant
respondent, v. roger anthony jimenez, defendant
oop
respondent, v. miguel salazar, defendant
respondent, v. johnny quino, defendant
r

respondent, v. sione tupe, defendant
respondent, v. juan villegas et al., defendants
respondent, v. juan tranquilino, defendant
respondent, v. carlos estrada, defendant
respondent, v. norice patterson, defendant
respondent, v. devin deshon jones, defendant
respondent, v. lexington national insurance corporation, defendant
respondent, v. michelle ford, defendant
respondent, v. kenneth dewyane hill, et al., defendant
respondent, v. ricardo rafael ramirez, defendant
respondent, v. keith carriere, defendant
respondent, v. nicky hernandez, defendant
respondent, v. michael eric mills, defendant
respondent, v. izac mccloud, defendant
respondent, v. ehab aly mohamed, defendant
respondent, v. jose gonzalez ruiz, defendant
respondent, v. dwayne michael haley, defendant
respondent, v. isam smith, defendant
respondent, v. rebecca coleman, defendant
respondent, v. robert jackson, defendant
respondent, v. james mcclelland, defendant
respondent, v. eric russell, defendant
respondent, v. stanley s. pa

respondent, v. katherine walters, defendant
respondent, v. steve montoya, defendant
respondent, v. trevor james lawson, defendant
respondent, v. marco valdizan, defendant
respondent, v. eric hidalgo et al., defendants
respondent, v. franklin devon harris, defendant
respondent, v. hector javier torres, defendant
respondent, v. richard lee perryman, defendant
respondent, v. towana latrice byers, defendant
respondent, v. muslim fadiboard, defendant
respondent, v. ruhani bustamante, defendant
respondent, v. chhann chhaim, defendant
respondent, v. jose j. mota et al., defendants
respondent, v. arturo burciaga, defendant
respondent, v. carlos vargas et al., defendant
respondent, v. clide mays, defendant
respondent, v. silvio hernandez, defendant
respondent, v. lande patrice butler, defendant
respondent, v. jeramy gerardo, defendant
respondent, v. benny anthony sanchez, defendant
respondent, v. anthony moses zamora, defendant
respondent, v. curtis watkins, defendant
respondent, v. david israe

respondent, v. salvador rios romo, defendant
respondent, v. kiana barker, defendant
respondent, v. undrey pierre jordan, defendant
respondent, v. ernesto hernandez, defendant
respondent, v. david daniel nevarez, defendant
respondent, v. christopher edwards, defendant
respondent, v. michael daniels, defendant
respondent, v. ricardo m. aguirre, defendant
respondent, v. dharmendra prasad, defendant
respondent, v. dorian rosalio guerrero, defendant
respondent, v. rhett eric edwards, defendant
respondent, v. marquis wayne james, defendant
respondent, v. bronco corzo and brian figueroa, defendants
respondent, v. arthur ramos, defendant
respondent, v. si h. liu, defendant
respondent, v. kyle jennings, defendant
respondent, v. angel robert zuniga, defendant
respondent, v. jimmy hill, defendant
respondent, v. osbaldo luna rubio, defendant
respondent, v. brendan j. lee et al., defendants
respondent, v. william chumley, defendant
respondent, v. charles wayne netherly, defendant
respondent, v. jor

respondent, v. gavino cirilo ramos, defendant
respondent, v. victor garcia, defendant
respondent, v. antonio hernandez, defendant
respondent, v. joshua lockett et al., defendants
respondent, v. ali o. egal, defendant
respondent, v. fanny stephanie gallegos, defendant
respondent, v. fidel andrew aguirre, defendant
respondent, v. claude h. thomas, defendant
respondent, v. oscar alvarado, defendant
respondent, v. destyn anthony rickman, defendant
respondent, v. julio garcia, defendant
respondent, v. erica artavia campbell, defendant
respondent, v. kenneth ray johnson et al., defendants
respondent, v. fernando marron albarran, defendant
respondent, v. peteru sepetaio, defendant
respondent, v. l bell, defendant
respondent, v. darrell rucker, defendant
respondent, v. gregory jackson, defendant
respondent, v. kevin latham, defendant
respondent, v. daniel arce, defendant
respondent, v. kenjuan d. adams, defendant
respondent, v. lawrence christopher wills, defendant
respondent, v. michael okeef

respondent, v. edwin lugo, defendant
respondent, v. sergio villalobos, defendant
respondent, v. michael lashon martinez, defendant
respondent, v. pierre tony redd, defendant
respondent, v. mel tyrone edward, defendant
respondent, v. mike donis, defendant
respondent, v. afif r. chammas, defendant
respondent, v. juan hernandez, defendant
respondent, v. david lee dove, defendant
respondent, v. konstanty makowski, defendant
respondent, v. jairo martinez, defendant
respondent, v. miguel angel espinoza, defendant
respondent, v. richard paul garcia, defendant
respondent, v. maximiliano quinteros hernandes, defendant
respondent, v. brandon ellis thomas, defendant
respondent, v. scott nolden, defendant
respondent, v. neil benjamin mcneeley, defendant
oop
respondent, v. jason stout, defendant
respondent, v. david brian abbott, defendant
respondent, v. scott shipley, defendant
respondent, v. william henry rouss, defendant
respondent, v. alfonso loaiza, defendant
respondent, v. elias moreno, defen

respondent, v. indiana lumbermens mutual insurance company, defendant
respondent, v. patricia bagley, defendant
respondent, v. david kim, defendant
respondent, v. travis robinson, defendant
respondent, v. john m. angol, defendant
respondent, v. dupree jackson and ewayne berry, defendants
respondent, v. qays mahjoob, defendant
respondent, v. jonathan aj deran powell, defendant
respondent, v. timothy jerode johnson, defendant
respondent, v. jabaz hebert, defendant
respondent, v. frankie perez, defendant
respondent, v. kelvin williams et al., defendants
respondent, v. howard lee, defendant
respondent, v. saul campos, et al., defendants
respondent, v. james williams, defendant
respondent, v. julian carter, defendant
respondent, v. alex donald jackson, defendant
respondent, v. albert alexander chavez, and elizabeth chavez, defendants
respondent, v. walter pereira, defendant
respondent, v. jonathon christopher vivo, defendant
respondent, v. travon eddie summers, defendant
respondent, v. amos

respondent, v. santana kelly, defendant
respondent, v. roger paul davis, defendant
respondent, v. shaun gregory williams, defendant
respondent, v. mario medina, defendant
oop
respondent, v. jesse ignacio gonzales, defendant
respondent, v. guillermo rodriguez, defendant
respondent, v. luon quon tran, defendant
respondent, v. juan gabriel trinidad, defendant
respondent, v. roman tansiel, defendant
respondent, v. toreano josephus browning, defendant
respondent, v. amanda vargas, defendant
respondent, v. steven garcia, defendant
respondent, v. curtis perry, defendant
respondent, v. gregory dean jolley, defendant
respondent, v. chris lamar smith, defendant
respondent, v. phillip richard powers et al., defendants
respondent, v. john a. magana, defendant
respondent, v. victor hugo sanchez, defendant
respondent, v. anthony rory cummings, defendant
respondent, v. ravon good, defendant
respondent, v. timothy lamar thompson, defendant
respondent, v. juan carlos amaya, defendant
respondent, v. joh

respondent, v. ronald art ward, defendant
oop
respondent, v. jonathan rodriguez, defendant
respondent, v. davshawn laray hennes, defendant
respondent, v. albert walker, defendant
respondent, v. dannon r. bryant, defendant
respondent, v. darrin san martin, defendant
respondent, v. antoine lamont downs, defendant
respondent, v. ryan taylor bright, defendant
respondent, v. julio cesar guevara, defendant
respondent, v. rene velasquez, defendant
respondent, v. samuel r. santiago, defendant
respondent, v. dylan frausto, defendant
respondent, v. joel campos, defendant
respondent, v. jesus armando hernandez, defendant
respondent, v. jonathan scott chacon, defendant
respondent, v. hoa duc le, defendant
respondent, v. sasha martinez, defendant
respondent, v. joshua rogers, et al., defendants
respondent, v. silvestre carvajal sanudo, defendant
respondent, v. david pangborn, defendant
respondent, v. dematray huggins, defendant
respondent, v. stefan mats linden, defendant
respondent, v. mika lavata

respondent, v. terry evans, defendant
respondent, v. leonard hall, defendant
respondent, v. joseph edward carrington, defendant
respondent, v. elvis aaron robles, defendant
respondent, v. jose luis munoz, defendant
respondent, v. marquise caliz, defendant
oop
respondent, v. orlando hinkston, defendant
respondent, v. macallaghan, defendant
respondent, v. charles newman, defendant
respondent, v. ivan goodlow, defendant
respondent, v. marquiette brown, defendant
respondent, v. carlos g. sanchez, defendant
respondent, v. roger henry neufeld, defendant
respondent, v. michael hurtado, defendant
respondent, v. jesus alberto peralta et al., defendants
respondent, v. jimmie laster, defendant
respondent, v. said shahmohamadian, defendant
respondent, v. raymond mikhi, defendant
respondent, v. david paz vasquez, defendant
respondent, v. darryl demetrius west, defendant
respondent, v. roger gutierrez, et al., defendant
respondent, v. joe lopez, defendant
respondent, v. alfredo islas-contreras, defe

respondent, v. shonte mosley, defendant
respondent, v. antonio roman, et al., defendants
respondent, v. edwin olivares, defendant
respondent, v. jose luna, defendant
respondent, v. ricky kamerica fontenot, defendant
respondent, v. cameron brown, defendant
respondent, v. ieon crawford, defendant
respondent, v. daniel pacheco, defendant
respondent, v. victor wayne webb, defendant
respondent, v. william arthur clark, defendant
respondent, v. benedicto asuncion, defendant
respondent, v. albert franco, defendant
respondent, v. nathan j. soto, defendant
respondent, v. tylan lionel gregory, defendant
respondent, v. ralph lopez, defendant
respondent, v. ernestine garcia, defendant
respondent, v. maurice sowells, defendant
respondent, v. james w. jacobs, defendant
respondent, v. cuong minh le, defendant
respondent, v. nicholas cade gudeon, defendant
respondent, v. john silva ramirez, defendant
oop
respondent, v. tracy allen montano, defendant
respondent, v. nicholas isaac adegbulugbe, defendant

respondent, v. financial casualty & surety, inc., defendant
respondent, v. alex dejesus linares, defendant
respondent, v. bradley eugene shells, defendant
respondent, v. alberto franco, defendant
respondent, v. robert genel, defendant
respondent, v. nancy jennifer arellano, defendant
respondent, v. dedrick brown, defendant
respondent, v. jay curtis brooks, defendant
respondent, v. freddy dawoud, defendant
respondent, v. jamel walker, defendant
respondent, v. dewey west, defendant
respondent, v. michael anthony dowden, defendant
respondent, v. dwight evans, defendant
respondent, v. jeffrey l. johnson, defendant
respondent, v. alonzo mckinney, defendant
respondent, v. david m. vasquez, defendant
respondent, v. sandy clinton lockheart, defendant
respondent, v. terell floyd, defendant
respondent, v. george gary leopard, defendant
respondent, v. oscar rodriguez, defendant
respondent, v. reggie cervantes, defendant
oop
respondent, v. louis edwards, defendant
respondent, v. james e. moore, de

respondent, v. abraham ruiz magdeleno, defendant
respondent, v. vanessa rebecca celaya, defendant
respondent, v. loren james hoelscher, defendant
respondent, v. christopher conrad johnson, defendant
respondent, v. frank haverly, et al., defendants
respondent, v. gilbert tapia, defendant
oop
respondent, v. juanita simpson, defendant
respondent, v. marvin bonifacio, defendant
respondent, v. alfred garcia, jr., defendant
respondent, v. jeffery deandre davis, defendant
respondent, v. scott alan lyles, defendant
respondent, v. christopher condee, defendant
respondent, v. lemont e. townsend et al., defendants
respondent, v. dizha ominique sanders, defendant
respondent, v. majestic zulu, defendant
respondent, v. raul aguilar, defendant
respondent, v. jonathan lamar askew, defendant
respondent, v. miguel carnero, defendant
respondent, v. allis troy coleman, defendant
respondent, v. anthony scott grantham, defendant
respondent, v. jamarae lamonze keyes, defendant
respondent, v. robert nico rami

respondent, v. cameron alex cole, defendant
respondent, v. antonio lenis smith, defendant
respondent, v. eddie earl haskin, defendant
respondent, v. kindrick d. thomas, defendant
oop
respondent, v. jose benitez, defendant
respondent, v. joseph salazar, defendant
respondent, v. eliel benitez, defendant
respondent, v. demetrous roland stillman, defendant
respondent, v. roy dycrus clay, defendant
respondent, v. jabaar v. thomas, defendant
respondent, v. harold howard, defendant
respondent, v. wilfredo cruz, defendant
respondent, v. george edward avila, defendant
respondent, v. donnell paige, defendant
respondent, v. charles edward allen, defendant
respondent, v. joel coronado, defendant
respondent, v. james duff, defendant
respondent, v. kenneth ray jackson, defendant
respondent, v. silvio hernandez, defendant
respondent, v. matthew lucifer mouton, defendant
respondent, v. carlos villegas, defendant
respondent, v. daniel guerrero, defendant
respondent, v. shumonte white, defendant
respond

respondent, v. daniel rios, defendant
respondent, v. john jaramillo, defendant
respondent, v. edward ojeda, defendant
respondent, v. simon alejandro quijas, defendant
respondent, v. todd tracy chism, defendant
respondent, v. liderato carlos beltran, defendant
respondent, v. nathaniel d. smith, defendant
respondent, v. ernestine chaney, defendant
respondent, v. quinn deshawn sterling, defendant
respondent, v. ernest padilla, defendant
respondent, v. darnell godfrey, defendant
respondent, v. esther solis, defendant
respondent, v. michael harden, defendant
respondent, v. dean edward manes, defendant
respondent, v. clifton hayes, defendant
respondent, v. julio alberto martinez, defendant
respondent, v. kim hubbard, defendant
respondent, v. john wayne willis, defendant
respondent, v. doris hartfield, defendant
respondent, v. david nieuwendaal, defendant
respondent, v. anthony joel williams, defendant
respondent, v. marquise jackson, defendant
respondent, v. anthony dwayne thompson, defendan

respondent, v. luis a. vela, defendant
respondent, v. jason zarr haber, defendant
respondent, v. robert enciso, defendant
respondent, v. andrew cachu, defendant
respondent, v. heriberto cruz, defendant
respondent, v. jerry ray espinosa, defendant
respondent, v. sandy jazmin davalos, defendant
respondent, v. christine anne rodin, defendant
respondent, v. francisco valdez, defendant
respondent, v. gerald theodore smith, defendant
respondent, v. david wayne hoyle, defendant
respondent, v. rosalio ordorica, defendant
respondent, v. markese dewon clark, defendant
respondent, v. darryl glenn brownlee, defendant
respondent, v. ezra jay kirk, defendant
respondent, v. marcos cruz, defendant
respondent, v. javier sanchez, defendant
respondent, v. kevin deon johnson et al., defendants
respondent, v. tommy cole, defendant
respondent, v. max carrillo, defendant
respondent, v. wayne e. woods, defendant
respondent, v. armando martin, defendant
respondent, v. edward l. martinez, defendant
respondent, 

respondent, v. carlos daniels, defendant
respondent, v. george parker, defendant
respondent, v. joaquin linares, defendant
respondent, v. cornell aaron brown, defendant
respondent, v. r.j., defendant
respondent, v. margie valeriano dikit, defendant
respondent, v. alvin ray shaw, jr., defendant
respondent, v. joe peter montoya, defendant
respondent, v. misael agid martinez, defendant
respondent, v. john p. firestone-kelly, defendant
respondent, v. harry lee ollie, defendant
respondent, v. wesley garcia, defendant
respondent, v. jose pepe mitchell, defendant
respondent, v. cuauhtemoc martinez, defendant
respondent, v. victor weathers, defendant
respondent, v. peter l. burchett, defendant
respondent, v. daveon travon toutar, defendant
respondent, v. hall lycurgus johnson, defendant
respondent, v. artis wilson, defendant
respondent, v. edgar arellano, defendant
respondent, v. christopher propps, defendant
respondent, v. agustin perez, defendant
respondent, v. jemar welch, defendant
respond

respondent, v. frankie calanche lopez, defendant
respondent, v. pierre lakell morris, defendant
respondent, v. brittany ann ingrassi, defendant
respondent, v. lloyd carr, defendant
respondent, v. dejuan lamont robertson, defendant
respondent, v. leslie croaker, defendant
respondent, v. nathaniel cyprian, defendant
respondent, v. jae jeong lyu, defendant
respondent, v. willie fred stephens, sr., defendant
respondent, v. angelo camacho, defendant
respondent, v. chaka grossman, defendant
respondent, v. arthur manuel villegas, defendant
respondent, v. matthew louis johnson, defendant
oop
respondent, v. anthony lamar mitchell, defendant
respondent, v. cody william leffler, defendant
respondent, v. anthony jonathan nila, defendant
respondent, v. omar delgado, defendant
respondent, v. ricardo avila, defendant
respondent, v. gabriel joseph casados, defendant
respondent, v. jack g. dadanian, defendant
respondent, v. sandra ritchie, defendant
respondent, v. robert mcarthur barnes, jr., defendant

respondent, v. leonardo salmeron, defendant
respondent, v. norman elder grundy, defendant
respondent, v. joshua hurtado, defendant
respondent, v. manuel jesus serrano, defendant
respondent, v. arturo acevedo, defendant
respondent, v. ernest j. casique, defendant
respondent, v. raul martinez, defendant
respondent, v. david worley, defendant
respondent, v. ronnie lee roman, defendant
respondent, v. adekunle olobayo-aisony, defendant
respondent, v. ezekiel simon, defendant
respondent, v. scott brian garcia, defendant
respondent, v. shvonte abdual bogard, defendant
respondent, v. jesse delgado, defendant
respondent, v. aaron arredondo, defendant
respondent, v. jordan koziol et al., defendants
respondent, v. alon oneil foster, defendant
respondent, v. hector bravo, defendant
respondent, v. gerald w. woods, defendant
respondent, v. michael clayton perkins, defendant
respondent, v. rhodanker mcguffie, defendant
respondent, v. anucha suwannangkul, defendant
respondent, v. michael parks, defend

respondent, v. franky robert mendez, defendant
respondent, v. john burns, defendant
respondent, v. christopher harrison goodine, defendant
respondent, v. jerome major carter, defendant
respondent, v. david gordon mountford, defendant
respondent, v. clyde richards, defendant
respondent, v. carlos cardoso, defendant
respondent, v. deante reginald smith, defendant
respondent, v. albert florencio chagollan, defendant
respondent, v. diane christine vigil, defendant
respondent, v. ramon armijo, defendant
respondent, v. jerome dean, defendant
respondent, v. trayvion rene thomas, defendant
respondent, v. tavares londell mcintosh, defendant
respondent, v. jason jimenez, defendant
respondent, v. chester alan duncan, defendant
respondent, v. romelio corelio espinoza, defendant
respondent, v. george alfaro, defendant
respondent, v. edwin phillips, defendant
respondent, v. tony smith et al, defendants
respondent, v. robert veiga, defendant
respondent, v. eduardo samayoa salguero, defendant
responde

respondent, v. phillip gallegos, defendant
respondent, v. juan carlos aquino, defendant
respondent, v. manuel anthony garibay, defendant
respondent, v. theodore justin bowers, defendant
respondent, v. lester mcdoughtery, defendant
respondent, v. demetris cornelious, defendant
respondent, v. michael anthony blankenship, defendant
respondent, v. troy m. hill, defendant
respondent, v. mark anthony gallegos, defendant
respondent, v. gregory anderson, defendant
respondent, v. rafiki smith, defendant
respondent, v. serafin torres urbina, defendant
respondent, v. robert james spells, defendant
respondent, v. robert clay woods, defendant
respondent, v. garveia brandon freeny, et al., defendants
respondent, v. bulmario g. torres, defendant
respondent, v. salvador hernandez, defendant
respondent, v. andrea almanza alfaro et al., defendants
respondent, v. james kelly norton, defendant
respondent, v. gerry mclean bailey, defendant
respondent, v. ismael alejandro sepulveda, defendant
respondent, v.

In [None]:
# Get defendant attorney
# search for ". names " in 

In [59]:
s = get_filing_date(test_corpus_clean[0:2000])
s

[]

In [62]:
re.findall(r'filed \d+/\d+/\d+', test_corpus_clean)

['filed 8/22/12']

In [61]:
test_corpus_clean[0:2000]

' filed 8/22/12 p. v. villasenor ca2/7 not to be published in the official reports california rules of court, rule 8.1115(a), prohibits courts and parties from citing or relying on opinions not certified for publication or ordered published, except as specified by rule 8.1115(b). this opinion has not been certified for publication or ordered published for purposes of rule 8.1115. in the court of appeal of the state of california second appellate district division seven the people, plaintiff and respondent, v. margarito villasenor, defendant and appellant. b234343 (los angeles county super. ct. nos. ka093275, ka090956) appeal from a judgment and an order of the superior court of los angeles county, mike camacho, judge. judgment affirmed; order affirmed as modified. david l. polsky, under appointment by the court of appeal, for defendant and appellant. kamala d. harris, attorney general, dane r. gillette, chief assistant attorney general, lance e. winters, assistant attorney general, jam

In [163]:
attorney = r'(\. .{3,100}? for defendants?)'
appointed = r'(under appointment)'
outcome = r'(\. judgm?en?t? .{3,100}?\.)'


In [164]:
def search_corpus(corpus, string): 
    found = re.findall(string, corpus[0:2000])
    if len(found) == 0:
        print('Not found.')
    return found
     

In [165]:
for corpus in clean_corpus_list[0:20]:
    print((search_corpus(corpus, attorney), search_corpus(corpus, appointed), search_corpus(corpus, outcome)))

Not found.
Not found.
(['. kathleen kennedy, judge. affirmed. geragos & geragos and mark j. geragos for defendant'], [], [])
Not found.
(['. affirmed. richard c. neuhoff and barbara a. zuras, under appointment by the court of appeal, for defendant'], ['under appointment'], [])
Not found.
(['. affirmed as modified. joseph shipp, under appointment by the court of appeal, for defendant', '. edward j. haggerty, under appointment by the court of appeal, for defendant'], ['under appointment', 'under appointment'], [])
Not found.
(['. affirmed as modified. sarah a. stockwell, under appointment by the court of appeal, for defendant', '. thomas owen, under appointment by the court of appeal, for defendant'], ['under appointment', 'under appointment'], [])
Not found.
(['. appeal dismissed. gary v. crooks, under appointment by the court of appeal, for defendant'], ['under appointment'], [])
Not found.
(['. charles a. chung, judge. affirmed. karyn h. bucur, under appointment by the court of appeal