In [1]:
import pandas as pd
import numpy as np
import json

### Pull out cases.

In [15]:
'''
Set up the cases dataframe from the cases json file.
(based on glowsplint's test_output.py code)
'''
json_file = input("enter path to json file of cases.\n")
output_df = pd.read_json(json_file)

court_map = {
    'High Court': 'SGHC',
    'Court of Appeal': 'SGCA',
    'District Court': 'SGDC',
    'Court of Criminal Appeal': 'SGCA'
}

output_df['Court'] = output_df.Court.map(court_map)
ref_columns = {
    0: 'refSLR',
    1: 'ref'
}

# Split reference into SLR reference and normal reference
output_df[list(ref_columns.values())] = pd.DataFrame(
    output_df.reference.apply(pd.Series)).rename(columns=ref_columns)
ref_na = output_df['ref'].isna()
output_df.loc[ref_na, 'ref'], output_df.loc[ref_na,'refSLR'] = output_df.refSLR[ref_na], np.nan
output_df.drop('reference', axis=1, inplace=True)

# retain the ordered list of parties in one column, before exploding output_df['parties'] next.
output_df = output_df.assign(listed_parties=output_df.parties)

# Construct the unique reference
exploded_output = output_df.explode('parties')
output_df = exploded_output.loc[exploded_output.parties.str.lower(
) != "public prosecutor"].copy()
output_df['unique_ref'] = output_df['ref'] + \
    ' ' + output_df['parties']
output_df.unique_ref = output_df.unique_ref.str.upper().str.replace(
    "AND ANOTHER", "").str.replace("AND OTHERS", "").str.replace(' +', ' ').str.strip()
output_df = output_df[['unique_ref', 'case_id', 'date', 'Court',
                                 'coram', 'counsel', 'listed_parties', 'parties', 'paragraphs']]
output_df.rename(columns={'parties': 'accused'}, inplace=True)

enter path to json file of cases.
 data/murder_cases.json


In [29]:
display(output_df)
print(f"no. of cases: {len(output_df)}")

Unnamed: 0,unique_ref,case_id,date,Court,coram,counsel,listed_parties,accused,paragraphs
0,[1983] SGHC 24 ADRIAN LIM,Criminal Case No 23 of 1982,1983-05-25,SGHC,"[Chua, J, Sinnathuray, J]",{'prosecution': ['Glenn Knight assisted by Roy...,"[Public Prosecutor, Adrian Lim, Tan Mui Chao, ...",Adrian Lim,"{'1': '25 May 1983 ', '2': 'Before we give ou..."
0,[1983] SGHC 24 TAN MUI CHAO,Criminal Case No 23 of 1982,1983-05-25,SGHC,"[Chua, J, Sinnathuray, J]",{'prosecution': ['Glenn Knight assisted by Roy...,"[Public Prosecutor, Adrian Lim, Tan Mui Chao, ...",Tan Mui Chao,"{'1': '25 May 1983 ', '2': 'Before we give ou..."
0,[1983] SGHC 24 HOC KAH HONG,Criminal Case No 23 of 1982,1983-05-25,SGHC,"[Chua, J, Sinnathuray, J]",{'prosecution': ['Glenn Knight assisted by Roy...,"[Public Prosecutor, Adrian Lim, Tan Mui Chao, ...",Hoc Kah Hong,"{'1': '25 May 1983 ', '2': 'Before we give ou..."
1,[2007] SGHC 86 KWONG KOK HING,CC 13/2007,2007-05-30,SGHC,[Choo Han Teck J],"{'prosecution': ['Francis Ng', 'Luke Tang', 'J...","[Public Prosecutor, Kwong Kok Hing]",Kwong Kok Hing,{'1': 'On 14 September 2006 the accused was qu...
2,[2008] SGHC 120 DANIEL VIJAY S/O KATHERASAN,CC 16/2007,2008-07-28,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Lee Cheow H...","[Public Prosecutor, Daniel Vijay s/o Katherasa...",Daniel Vijay s/o Katherasan,{'1': 'The first accused is Daniel Vijay s/o K...
2,[2008] SGHC 120 CHRISTOPHER SAMSON S/O ANPALAGAN,CC 16/2007,2008-07-28,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Lee Cheow H...","[Public Prosecutor, Daniel Vijay s/o Katherasa...",Christopher Samson s/o Anpalagan,{'1': 'The first accused is Daniel Vijay s/o K...
2,[2008] SGHC 120 NAKAMUTHU BALAKRISHNAN,CC 16/2007,2008-07-28,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Lee Cheow H...","[Public Prosecutor, Daniel Vijay s/o Katherasa...",Nakamuthu Balakrishnan,{'1': 'The first accused is Daniel Vijay s/o K...
3,[2008] SGHC 22 BAROKAH,CC 23/2007,2008-02-11,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Stella Tan'...","[Public Prosecutor, Barokah]",Barokah,{'1': 'The accused is an Indonesian woman born...
4,[2009] SGHC 46 BAROKAH,CC 23/2007,2009-02-26,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Stella Tan'...","[Public Prosecutor, Barokah]",Barokah,{'1': 'The precursor to this judgment is PP v ...
5,[2010] SGCA 37 ONG PANG SIEW,Criminal Appeal No 4 of 2009,2010-11-08,SGCA,"[Chan Sek Keong CJ, Andrew Phang Boon Leong JA...","{'prosecution': ['Prem Raj', 'Bala Reddy'], 'd...","[Ong Pang Siew, Public Prosecutor]",Ong Pang Siew,{'1': 'This is the appeal by Ong Pang Siew (“t...


no. of cases: 32


In [17]:
output_df.isnull().values.any()

False

### Pull out appeals.

In [5]:
# filter out cases that are definitely appeal cases into a new dataframe.
appeals_indices = []
for i in range(len(output_df)):
    if 'SGCA' in output_df.iloc[i]["Court"]:
        appeals_indices.append(i)

appeals_df = output_df.iloc[appeals_indices].reset_index(drop=True)
display(appeals_df.head())
print(f"no. of cases: {len(appeals_df)}")

Unnamed: 0,unique_ref,date,Court,coram,counsel,listed_parties,accused,paragraphs
0,[2010] SGCA 37 ONG PANG SIEW,2010-11-08,SGCA,"[Chan Sek Keong CJ, Andrew Phang Boon Leong JA...","{'prosecution': ['Prem Raj', 'Bala Reddy'], 'd...","[Ong Pang Siew, Public Prosecutor]",Ong Pang Siew,{'1': 'This is the appeal by Ong Pang Siew (“t...
1,[2012] SGCA 44 PATHIP SELVAN S/O SUGUMARAN,2012-08-15,SGCA,"[Chan Sek Keong CJ, Andrew Phang Boon Leong JA...","{'prosecution': ['David Khoo', 'Dennis Tan'], ...","[Pathip Selvan s/o Sugumaran, Public Prosecutor]",Pathip Selvan s/o Sugumaran,{'1': 'This is a tragic case of a young couple...
2,[2014] SGCA 58 WANG ZHIJIAN,2014-11-28,SGCA,"[Chao Hick Tin JA, Andrew Phang Boon Leong JA,...","{'prosecution': ['Hay Hung Chun', 'Timotheus K...","[Public Prosecutor, Wang Zhijian]",Wang Zhijian,{'1': 'This was a tragic case involving the de...
3,[2015] SGCA 1 KHO JABING,2015-01-14,SGCA,"[Chao Hick Tin JA, Andrew Phang Boon Leong JA,...","{'prosecution': ['Hay Hung Chun', 'Teo Lu Jia'...","[Public Prosecutor, Kho Jabing]",Kho Jabing,{'1': 'In PP v Galing Anak Kujat [2010] SGHC 2...
4,[2016] SGCA 21 JABING KHO,2016-04-05,SGCA,"[Chao Hick Tin JA, Andrew Phang Boon Leong JA,...","{'prosecution': ['Francis Ng', 'Marshall Lim',...","[Jabing Kho, Public Prosecutor]",Jabing Kho,{'1': 'In our recent decision in The Royal Ban...


no. of cases: 12


In [6]:
len(appeals_df)

12

In [7]:
# find the appellant of each case, and put into df.
appellants = list()
for i in range(len(appeals_df)):
    if appeals_df.iloc[i]['listed_parties'][0].lower() != "public prosecutor":
        appellant = appeals_df.iloc[i]['accused']
        and_index = appellant.find("and ")
        if and_index != -1:
            # if 'X and others' then cut away ' and others'
            appellant = appellant[0:and_index - 1]
    else:
        appellant = "public prosecutor"
    appellants.append(appellant)
print(appellants)
appeals_df = appeals_df.assign(appellant=appellants)
appeals_df = appeals_df.reset_index(drop=True)
appeals_df.head()

['Ong Pang Siew', 'Pathip Selvan s/o Sugumaran', 'public prosecutor', 'public prosecutor', 'Jabing Kho', 'ROSMAN BIN ABDULLAH', 'public prosecutor', 'Micheal Anak Garing', 'Iskandar Bin Rahmat', 'public prosecutor', 'public prosecutor', 'Chan Lie Sian']


Unnamed: 0,unique_ref,date,Court,coram,counsel,listed_parties,accused,paragraphs,appellant
0,[2010] SGCA 37 ONG PANG SIEW,2010-11-08,SGCA,"[Chan Sek Keong CJ, Andrew Phang Boon Leong JA...","{'prosecution': ['Prem Raj', 'Bala Reddy'], 'd...","[Ong Pang Siew, Public Prosecutor]",Ong Pang Siew,{'1': 'This is the appeal by Ong Pang Siew (“t...,Ong Pang Siew
1,[2012] SGCA 44 PATHIP SELVAN S/O SUGUMARAN,2012-08-15,SGCA,"[Chan Sek Keong CJ, Andrew Phang Boon Leong JA...","{'prosecution': ['David Khoo', 'Dennis Tan'], ...","[Pathip Selvan s/o Sugumaran, Public Prosecutor]",Pathip Selvan s/o Sugumaran,{'1': 'This is a tragic case of a young couple...,Pathip Selvan s/o Sugumaran
2,[2014] SGCA 58 WANG ZHIJIAN,2014-11-28,SGCA,"[Chao Hick Tin JA, Andrew Phang Boon Leong JA,...","{'prosecution': ['Hay Hung Chun', 'Timotheus K...","[Public Prosecutor, Wang Zhijian]",Wang Zhijian,{'1': 'This was a tragic case involving the de...,public prosecutor
3,[2015] SGCA 1 KHO JABING,2015-01-14,SGCA,"[Chao Hick Tin JA, Andrew Phang Boon Leong JA,...","{'prosecution': ['Hay Hung Chun', 'Teo Lu Jia'...","[Public Prosecutor, Kho Jabing]",Kho Jabing,{'1': 'In PP v Galing Anak Kujat [2010] SGHC 2...,public prosecutor
4,[2016] SGCA 21 JABING KHO,2016-04-05,SGCA,"[Chao Hick Tin JA, Andrew Phang Boon Leong JA,...","{'prosecution': ['Francis Ng', 'Marshall Lim',...","[Jabing Kho, Public Prosecutor]",Jabing Kho,{'1': 'In our recent decision in The Royal Ban...,Jabing Kho


In [11]:
path = input('enter path to save csv file:\n')
appeals_df[['unique_ref', 'Court', 'appellant', 'accused']].to_csv(path)

enter path to save csv file:
 data/murder_cases_some_metadata.csv


### Pull out trials.

In [35]:
# filter out cases that are definitely trial cases
trials_indices = []
for i in range(len(output_df)):
    if "criminal case" in output_df.iloc[i]["case_id"].lower() or \
    "cc " in output_df.iloc[i]["case_id"].lower() and \
    output_df.iloc[i]["Court"] != 'SGCA':
        trials_indices.append(i)
    elif output_df.iloc[i]["Court"] == 'SGDC':
        trials_indices.append(i)

trials_df = output_df.iloc[trials_indices]
display(trials_df)
print(f"no. of cases: {len(trials_df)}")

Unnamed: 0,unique_ref,case_id,date,Court,coram,counsel,listed_parties,accused,paragraphs
0,[1983] SGHC 24 ADRIAN LIM,Criminal Case No 23 of 1982,1983-05-25,SGHC,"[Chua, J, Sinnathuray, J]",{'prosecution': ['Glenn Knight assisted by Roy...,"[Public Prosecutor, Adrian Lim, Tan Mui Chao, ...",Adrian Lim,"{'1': '25 May 1983 ', '2': 'Before we give ou..."
0,[1983] SGHC 24 TAN MUI CHAO,Criminal Case No 23 of 1982,1983-05-25,SGHC,"[Chua, J, Sinnathuray, J]",{'prosecution': ['Glenn Knight assisted by Roy...,"[Public Prosecutor, Adrian Lim, Tan Mui Chao, ...",Tan Mui Chao,"{'1': '25 May 1983 ', '2': 'Before we give ou..."
0,[1983] SGHC 24 HOC KAH HONG,Criminal Case No 23 of 1982,1983-05-25,SGHC,"[Chua, J, Sinnathuray, J]",{'prosecution': ['Glenn Knight assisted by Roy...,"[Public Prosecutor, Adrian Lim, Tan Mui Chao, ...",Hoc Kah Hong,"{'1': '25 May 1983 ', '2': 'Before we give ou..."
1,[2007] SGHC 86 KWONG KOK HING,CC 13/2007,2007-05-30,SGHC,[Choo Han Teck J],"{'prosecution': ['Francis Ng', 'Luke Tang', 'J...","[Public Prosecutor, Kwong Kok Hing]",Kwong Kok Hing,{'1': 'On 14 September 2006 the accused was qu...
2,[2008] SGHC 120 DANIEL VIJAY S/O KATHERASAN,CC 16/2007,2008-07-28,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Lee Cheow H...","[Public Prosecutor, Daniel Vijay s/o Katherasa...",Daniel Vijay s/o Katherasan,{'1': 'The first accused is Daniel Vijay s/o K...
2,[2008] SGHC 120 CHRISTOPHER SAMSON S/O ANPALAGAN,CC 16/2007,2008-07-28,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Lee Cheow H...","[Public Prosecutor, Daniel Vijay s/o Katherasa...",Christopher Samson s/o Anpalagan,{'1': 'The first accused is Daniel Vijay s/o K...
2,[2008] SGHC 120 NAKAMUTHU BALAKRISHNAN,CC 16/2007,2008-07-28,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Lee Cheow H...","[Public Prosecutor, Daniel Vijay s/o Katherasa...",Nakamuthu Balakrishnan,{'1': 'The first accused is Daniel Vijay s/o K...
3,[2008] SGHC 22 BAROKAH,CC 23/2007,2008-02-11,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Stella Tan'...","[Public Prosecutor, Barokah]",Barokah,{'1': 'The accused is an Indonesian woman born...
4,[2009] SGHC 46 BAROKAH,CC 23/2007,2009-02-26,SGHC,[Tay Yong Kwang J],"{'prosecution': ['Amarjit Singh', 'Stella Tan'...","[Public Prosecutor, Barokah]",Barokah,{'1': 'The precursor to this judgment is PP v ...
6,[2010] SGHC 131 ASTRO BIN JAKARIA,Criminal Case No 6 of 2010,2010-04-29,SGHC,[Chan Seng Onn J],"{'prosecution': ['Cassandra Cheong', 'Ng Cheng...","[Public Prosecutor, Astro bin Jakaria]",Astro bin Jakaria,"{'1': 'The accused, Astro bin Jakaria, is char..."


no. of cases: 20


In [39]:
trial_cases = set(trials_df['unique_ref'].values)
appeal_cases = set(appeals_df['unique_ref'].values)
all_cases = set(output_df['unique_ref'].values)
leftover = all_cases - (trial_cases | appeal_cases)
if len(leftover) == 0:
    print('all cases found have been identified as either trial or appeal.')

all cases found have been identified as either trial or appeal.


In [37]:
trial_cases

{'[1983] SGHC 24 ADRIAN LIM',
 '[1983] SGHC 24 HOC KAH HONG',
 '[1983] SGHC 24 TAN MUI CHAO',
 '[2007] SGHC 86 KWONG KOK HING',
 '[2008] SGHC 120 CHRISTOPHER SAMSON S/O ANPALAGAN',
 '[2008] SGHC 120 DANIEL VIJAY S/O KATHERASAN',
 '[2008] SGHC 120 NAKAMUTHU BALAKRISHNAN',
 '[2008] SGHC 22 BAROKAH',
 '[2009] SGHC 46 BAROKAH',
 '[2010] SGHC 131 ASTRO BIN JAKARIA',
 '[2010] SGHC 212 GALING ANAK KUJAT',
 '[2010] SGHC 82 AFR',
 '[2012] SGHC 238 WANG ZHIJIAN',
 '[2013] SGHC 251 KHO JABING',
 '[2014] SGHC 23 WANG WENFENG',
 '[2015] SGHC 292 SUTHERSON, SUJAY SOLOMON',
 '[2017] SGHC 29 RAMZAN RIZWAN',
 '[2017] SGHC 29 RASHEED MUHAMMAD',
 '[2017] SGHC 307 P MAGESWARAN',
 '[2018] SGHC 135 BPK'}

In [38]:
appeal_cases

{'[2010] SGCA 37 ONG PANG SIEW',
 '[2012] SGCA 44 PATHIP SELVAN S/O SUGUMARAN',
 '[2014] SGCA 58 WANG ZHIJIAN',
 '[2015] SGCA 1 KHO JABING',
 '[2016] SGCA 21 JABING KHO',
 '[2016] SGCA 62 ROSMAN BIN ABDULLAH',
 '[2017] SGCA 69 BDB',
 '[2017] SGCA 7 MICHEAL ANAK GARING',
 '[2017] SGCA 9 ISKANDAR BIN RAHMAT',
 '[2018] SGCA 30 CHIA KEE CHEN',
 '[2019] SGCA 22 P MAGESWARAN',
 '[2019] SGCA 44 CHAN LIE SIAN'}

In [40]:
path = input('enter csv path to save trials metadata:\n')
trials_df[['unique_ref', 'Court', 'accused']].to_csv(path)

enter csv path to save trials metadata:
 data/murder_trials_some_metadata.csv
