In [83]:
# libraries

import re

In [84]:
# config

def get_vancouver():
    elements =  ["Author(s) of paper – family name and initials.", "Title of paper.", 
            "In: Editor(s) Family name and initials editor(s).","Title of conference.", 
            "Date of conference;", "Place of conference.", "Place of publication: Publisher’s name", 
            "Publication year", "p. Page numbers."]
#           Bengtsson S, Solheim BG. Enforcement of data protection in medical informatics. 
#           In: Lun KC, Degoulet P, Piemme TE, Reinhoff O, editors. MEDINFO 92. Proceedings of the 7th World Congress on Medical Informatics. 
#           1992 Sep 6‐10;          Geneva, Switzerland.    Amsterdam (NL): North Holland; 
#.          1992. p. 1561‐5.

def get_verbose():
    verbose = "no" # "yes"
    return verbose

In [85]:
# in

def read_raw(meeting):
    meeting    = str(meeting)
    eviprg_raw = open("{}th_eviprg_raw.txt".format(meeting)).read().split("\n")
    return eviprg_raw

In [86]:
# clean

def concat_lower_case(corrected_qtop):
    concatenated = []
    for i in range(len(corrected_qtop)-1):
        j      = i+1
        first  = corrected_qtop[i]
        second = corrected_qtop[j]
        if first[0].isupper():
            if second[0].islower():
                first_and_second = first+" "+second
                concatenated.append(first_and_second)
            else:
                concatenated.append(first)
    return concatenated

def correct_Qnnn_to_pnnn(has_p):
    corrected = []
    for snippet in has_p:
        sn_number = snippet[0]
        sn_text   = snippet[1]
        if len(sn_text)>2:    # page numbers
            if sn_text[-4]=="Q":
                sn_text   = sn_text.replace("Q", " p. ")
            corrected.append(sn_text)
    return corrected

def sniff_chapter(snippet):
    number = snippet[0]
    text   = snippet[1]
    return text[:7] == "Chapter"

def sniff_preface(snippet):
    number = snippet[0]
    text   = snippet[1]
    return text[:7] == "Preface"

def find_extra(numbered_raw):
    are_extra = list()
    for snippet in numbered_raw:
        sn_number = snippet[0]
        sn_text   = snippet[1]
        #print(number, text)
        if sniff_chapter(snippet):
            are_extra.append(sn_number)
    verbose = get_verbose()
    if verbose == "yes":
        print("are chapters:\n", are_extra)
    for snippet in numbered_raw:
        sn_number = snippet[0]
        sn_text   = snippet[1]
        #print(number, text)
        if sniff_preface(snippet):
            are_extra.append(sn_number)
    verbose = get_verbose()
    if verbose == "yes":
        print("are chapters then prefaces:\n", are_extra)        
    return are_extra

def spacedots_to_pages(no_extra):
    has_QQ = []
    for snippet in no_extra:
        sn_number = snippet[0]
        sn_text   = snippet[1]
        sn_Page   = sn_text.replace(" .", "QQ")
        has_QQ.append(sn_Page)
    has_Q_ = []
    for snippet in has_QQ:
        to_Q = re.sub("Q+", "Q", snippet)
        has_Q_.append(to_Q)
    has_p = []
    for snippet in has_Q_:
        to_p = re.sub("Q ", " p. ", snippet)
        has_p.append(to_p)
    has_pages = list(enumerate(has_p))
    return has_pages

In [93]:
# main

if __name__=="__main__":
    meeting                 = 7
    eviprg_raw              = read_raw(meeting)
    numbered_raw            = list(enumerate(eviprg_raw))
    are_extra               = find_extra(numbered_raw)
    no_extra                = [sn for sn in numbered_raw if sn[0] not in are_extra]  
    has_pages               = spacedots_to_pages(no_extra)
    corrected_qtop          = correct_Qnnn_to_pnnn(has_pages)
    concatenated_lower_case = concat_lower_case(corrected_qtop)
    for i in enumerate(concatenated_lower_case):
        print(i)
    
        

(0, 'The Comprehensive Assessment of Psychopathic Personality (CAPP) p. 27')
(1, 'Stephen Hart (Canada)')
(2, 'The Short-Term Assessment of Risk and Treatability (START): measuring and managing risk in the short-term p. 29')
(3, 'Caroline Logan Michael Doylee (UK)')
(4, 'European Association in Mental Health and Intellectual Disability (EAMHID)')
(5, 'Guidelines for Assessment, Diagnostic and Treatment of Severe Problem')
(6, 'Behaviour in Persons with Intellectual Disability p. 30')
(7, 'Anton Došen (Netherlands)')
(8, 'The bulldozer and the ballet dancer: Aspects of nurses’ caring approaches in acute psychiatric intensive care p. 31')
(9, 'Anna Björkdahl, Tom Palmstierna, Hansebo Görel')
(10, 'SLSO, Health Care Provision, Stockholm County Council, Stockholm, Sweden')
(11, 'The possibilities and pitfalls in violence risk assessment and management p. 33')
(12, 'Patrick Callaghan')
(13, 'University of Nottingham, UK')
(14, 'Violence and Mandated Community Treatment: The MacArthur Studie

In [92]:
# tests

def test_concat_lower_case():
    corrected_qtop = ['Potential severity of aggressive behaviour after acquired brain injury:', 
                      'implications for recording p. 162',
     'Geoff Dickens, Nick Alderman, Len Bowers', 'St Andrew’s Academic Centre, Northampton, UK', 
     'Violence and mental disorder: A central paradigm for psychiatry p. 166', 'Virgil Hancock', 
     'Carondelet Hospitals, Tucson, Arizona, USA', 
     'Safety in numbers: is the violence research literature too heterogeneous to be useful? p. 172', 
     'Juliet Hockenhull, Richard Whittington, Maria Leitner, M Gemma Cherry, Wally Barr, James', 
     'McGuire, Rumona Dickson. University of Liverpool, Liverpool, UK', 
     'Partnering with an inpatient psychiatric hospital to evaluate measures of', 'aggressive behavior p. 173', 
     'Joanne Iennaco, Lawrence Scahill, Jane Dixon, Robin Whittemore, Susan Busch, Len Bowers', 
     'Yale University, New Haven, USA', 
     'I’m just doing fine: Preliminary results of an art and training project relating to', 
     'self-harm in psychiatry p. 174', 'Nienke Kool, Berno van Meijel, Bauke Koekkoek, Ad Kerkhof', 
     'Palier, Intensive Treatment Centre, The Hague, The Netherlands', 
     'Audit of suicides in a South Staffordshire (UK) psychiatry unit to determine any',
     'preventable deaths p. 176', 'Vasudevan Krishnan, Jayaraj Padmanabhan, Abid Khan', 
     'South Staffordshire and Shropshire NHS Trust, Stafford, UK', 
     'Introducing screening for domestic violence in family care settings in Slovenia p. 177',
     'Nena Kopcavar Gucek, Igor Svab, Polona Selic', 
     'Department of Family Medicine, Medical School, University of Ljubljana, Ljubljana, Slovenia', 
     'A comparative study of attitudes towards aggression in 16 psychiatric wards p. 178', 
     'Tero Laiho, Hanna Putkonen, Nina Lindberg', 
     'Helsinki University Central Hospital, Clinic of Acute Psychiatry, Peijas Hospital, Vantaa, Finland',
     'Available instruments to register aggression incidents on psychiatric wards: a', 
     'literature review p. 179', 
     'Joris Leys, Karen Lauwaert, Nataly Fillion, Micheline Gobert, Katrien Vanderwee, Sofie Verhaeghe',
     'Ghent University, Ghent, Belgium', 'Demographics on not formerly known inpatients in an medium security forensic', 
     'department in Denmark p. 180', 'Steen Madsen, Christian Delcomyn Steffensen, Thomas Schütze',
     'Psykiatrisk Center Sankt Hans, Roskilde, Denmark', 
     'Aggression in psychiatric settings: incident registration using SOAS-R in nine', 
     'Belgian hospitals p. 181', # annoying
     'Joris Leys, Karen Lauwaert, Nataly Fillion, Jasper Feyaerts, Micheline Gobert, Katrien', 
     'Vanderwee, Sofie Verhaeghe. Ghent University, Ghent, Belgium']
    concatenated = concat_lower_case(corrected_qtop)
    #for i in enumerate(concatenated):
    #    assert i[1].isupper(), "should be upper"
    #print("pass concat lower case")

def test_correct_Qnnn_to_pnnn():
    has_p = [(750, 'An exchange program for psychiatric nurses of acute wards to reduce the use of'),
    (751, 'seclusion p. 454'),
    (752, 'Remy Welleman, Elleke Landeweer, Cecile Gijsbers van Wijk, Guy Widdershoven'),
    (753, 'GGZ inGeest, Amsterdam, The Netherlands'),
    (754, 'Association between physical environmental factors and levels of conflict and'),
    (755, 'containment on psychiatric wards in the NetherlandsQ455'),
    (756, 'Petra van der Schaaf, Femmy Keuning,Elise Dusseldorp, Eric Noorthoorn, Wim Janssen'),
    (757, 'TNO, Dutch Centre for Health Assets/DuCHA, Utrecht, The Netherlands'),
    (758, 'Variation in coercive measures over a large Dutch sample: understanding'),
    (759, 'differences p. 457'),
    (760, 'Eric Noorthoorn, Wim Janssen, Adriaan Hoogendoorn, Petra van der Schaaf, Femmy Keunig,'),
    (761, 'Guy Widdershoven, Henk Nijman, The Netherlands'),
    (762, 'Staffing and patients characteristics as determinants of seclusionQ460'),
    (763, 'Wim Janssen, Petra van der Schaaf Eric Noorthoorn, Len Bowers, CL Mulder, Annet Smit,'),
    (764, 'Henk Nijman, Guy Widdershoven'),
    (765, 'GGNet Warnsveld, The Netherlands')]
    corrected = correct_Qnnn_to_pnnn(has_p)
    for i in corrected:
        assert i[-3] != "Q", "should not be Q"
    print("passed Q to p")

def test_read_raw():
    meeting = 7
    raw     = read_raw(meeting)
    assert type(raw)==list, "should be a list"
    assert type(raw[0])==str, "should be a string"
    print("passed read raw")

def test_spacedots_to_pages():

    no_extra = [(2, 'The Comprehensive Assessment of Psychopathic Personality (CAPP) . . . . . . . . . . . . . . . . . . . . . 27'),
    (3, 'Stephen Hart (Canada)'), 
    (4, 'The Short-Term Assessment of Risk and Treatability (START): measuring and'), 
    (5, 'managing risk in the short-term . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29'), 
    (6, 'Caroline Logan Michael Doylee (UK)'), 
    (7, 'European Association in Mental Health and Intellectual Disability (EAMHID)'), 
    (8, 'Guidelines for Assessment, Diagnostic and Treatment of Severe Problem'), 
    (9, 'Behaviour in Persons with Intellectual Disability . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30'), 
    (10, 'Anton Došen (Netherlands)'), 
    (12, 'The bulldozer and the ballet dancer: Aspects of nurses’ caring approaches in'), 
    (13, 'acute psychiatric intensive care . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 31'), 
    (14, 'Anna Björkdahl, Tom Palmstierna, Hansebo Görel'), 
    (15, 'SLSO, Health Care Provision, Stockholm County Council, Stockholm, Sweden'), 
    (16, 'The possibilities and pitfalls in violence risk assessment and management . . . . . . . . . . . . . . . 33'), 
    (17, 'Patrick Callaghan'), 
    (18, 'University of Nottingham, UK'), 
    (19, 'Violence and Mandated Community Treatment: The MacArthur Studies . . . . . . . . . . . . . . . . . 34'), 
    (20, 'John Monahan'), 
    (21, 'School of Law, University of Virginia, Charlottesville, USA'), 
    (22, 'Be the agent of the change you want to see in the world: Reducing restrictive'), 
    (23, 'interventions in human services . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39'),
    (24, 'Sharon Paley'), 
    (25, 'British Institute of Learning Disabilities, Kidderminster, UK')]
    has_pages = spacedots_to_pages(no_extra)
    #print("has pages:\t", has_pages)
    assert type(has_pages)==list, "wrong type"
    print("passed spacedots to pages")
    
def tests():
    test_correct_Qnnn_to_pnnn()
    test_read_raw()
    test_spacedots_to_pages()
    test_concat_lower_case()
    print("passed all tests")
tests()

passed Q to p
passed read raw
passed spacedots to pages
passed all tests
