In [1]:
import pandas as pd

## Step 1: Creating the IAT judges list

### Creating the judge list with its IAT score for AMAG I

In [2]:
iat_i = pd.read_csv(r"C:\Users\PC\Daniel Chen Dropbox\Alexis Malca\Peru_Justice\02_Data\01_AMAG\03_Intermediate\iat_data_clean.csv")

iat_i = iat_i.dropna(subset=['iat_score'])

# Optionally, you can reset the index after dropping rows
iat_i = iat_i.reset_index(drop=True)

In [3]:
df_i = pd.read_csv(r"C:\Users\PC\Daniel Chen Dropbox\Alexis Malca\Peru_Justice\02_Data\01_AMAG\03_Intermediate\PCA_2020_all_participants.csv")

In [17]:
amag_i = pd.merge(iat_i, df_i, left_on='nrodocumento', right_on='dni', how='inner')

# Count the number of identical values
num_identical = len(amag_i)

print(f"Number of identical values between 'DNI' and 'nrodocumento': {num_identical}")

Number of identical values between 'DNI' and 'nrodocumento': 313


In [18]:
amag_i["participant_apellido_nombre"] = amag_i["apellido_paterno"] + " " + amag_i["apellido_materno"] + " " + amag_i["nombres"]
amag_i["participant_apellido_nombre"] = amag_i["participant_apellido_nombre"].str.strip()

amag_i["participant_nombre_apellido"] = amag_i["nombres"] + " " + amag_i["apellido_paterno"] + " " + amag_i["apellido_materno"]
amag_i["participant_nombre_apellido"] = amag_i["participant_nombre_apellido"].str.strip()

In [19]:
# Select specific columns: ApellidoPaterno, ApellidoMaterno, Nombres
amag_i = amag_i[['participant_apellido_nombre', 'participant_nombre_apellido', 'nombres' ,'iat_score']]

In [20]:
amag_i.rename(columns={'nombres': 'names'}, inplace=True)

# Take into account I'm not filtering by charge, since in this dataset we only have information about affiliated institution, and not the charge

### Creating the judge list with its IAT score for AMAG II

In [11]:
amag_ii = pd.read_stata(r"C:\Users\PC\Daniel Chen Dropbox\Alexis Malca\Peru_Justice\02_Data\08_CEJ_Web\data_cleaned\lab_data\Clean_Full_Data12.dta")

In [12]:
# Filter rows where the 'Cargo' column contains 'JUEZ', remember for amag i I cannot do this filter. 
amag_ii = amag_ii[amag_ii['Cargo'].str.contains('JUEZ', na=False)]

# Select specific columns: ApellidoPaterno, ApellidoMaterno, Nombres
amag_ii = amag_ii[['ApellidoPaterno', 'ApellidoMaterno', 'Nombres' ,'bs_iat_score']]

amag_ii = amag_ii.dropna(subset=['bs_iat_score'])


In [13]:
amag_ii["participant_apellido_nombre"] = amag_ii["ApellidoPaterno"] + " " + amag_ii["ApellidoMaterno"] + " " + amag_ii["Nombres"]
amag_ii["participant_apellido_nombre"] = amag_ii["participant_apellido_nombre"].str.strip()

amag_ii["participant_nombre_apellido"] = amag_ii["Nombres"] + " " +  amag_ii["ApellidoPaterno"] + " " + amag_ii["ApellidoMaterno"]
amag_ii["participant_nombre_apellido"] = amag_ii["participant_nombre_apellido"].str.strip()

In [14]:
amag_ii.rename(columns={'Nombres': 'names', 'bs_iat_score': 'iat_score'}, inplace=True)
amag_ii = amag_ii[['participant_apellido_nombre', 'participant_nombre_apellido', 'names' ,'iat_score']]


Unnamed: 0,participant_apellido_nombre,participant_nombre_apellido,names,iat_score
44,SALAZAR DIAZ VLADIMIR OMAR,VLADIMIR OMAR SALAZAR DIAZ,VLADIMIR OMAR,0.087537
53,SUPANTA CONDOR MARINA INES,MARINA INES SUPANTA CONDOR,MARINA INES,-0.663009
59,LINARES REBAZA DYRAN JORGE,DYRAN JORGE LINARES REBAZA,DYRAN JORGE,-0.236486
84,CLEMENTE SALOME DILMA ZORAIDA,DILMA ZORAIDA CLEMENTE SALOME,DILMA ZORAIDA,-0.250922
88,VIVIANO VALDEZ ROMAIN IGORT,ROMAIN IGORT VIVIANO VALDEZ,ROMAIN IGORT,-0.483749
...,...,...,...,...
5476,ASMAD CORCUERA MARCO ANTONIO,MARCO ANTONIO ASMAD CORCUERA,MARCO ANTONIO,-0.265571
5528,MIO LOPEZ FLOR GRACIELA,FLOR GRACIELA MIO LOPEZ,FLOR GRACIELA,-0.160823
5571,AGUILAR BAZAN MARLY YOHANA,MARLY YOHANA AGUILAR BAZAN,MARLY YOHANA,-0.243329
5724,SANTILLAN TUESTA JUAN CARLOS,JUAN CARLOS SANTILLAN TUESTA,JUAN CARLOS,0.088814


### Consolidating in one list amag I and amag II

In [21]:
amag_i.head()

Unnamed: 0,participant_apellido_nombre,participant_nombre_apellido,names,iat_score
0,PEREZ CALDERON PATRICIA DEL CARMEN,PATRICIA DEL CARMEN PEREZ CALDERON,PATRICIA DEL CARMEN,-0.176195
1,FRISANCHO ENRIQUEZ CARLOS,CARLOS FRISANCHO ENRIQUEZ,CARLOS,-0.646047
2,VALDIVIA CALDERON LUIS ENRIQUE,LUIS ENRIQUE VALDIVIA CALDERON,LUIS ENRIQUE,-0.70861
3,VITERI VALIENTE YESSICA PAOLA,YESSICA PAOLA VITERI VALIENTE,YESSICA PAOLA,-0.255366
4,TICONA MIRANDA MARCO ANTONIO,MARCO ANTONIO TICONA MIRANDA,MARCO ANTONIO,-0.019535


In [16]:
amag_ii.head()

Unnamed: 0,participant_apellido_nombre,participant_nombre_apellido,names,iat_score
44,SALAZAR DIAZ VLADIMIR OMAR,VLADIMIR OMAR SALAZAR DIAZ,VLADIMIR OMAR,0.087537
53,SUPANTA CONDOR MARINA INES,MARINA INES SUPANTA CONDOR,MARINA INES,-0.663009
59,LINARES REBAZA DYRAN JORGE,DYRAN JORGE LINARES REBAZA,DYRAN JORGE,-0.236486
84,CLEMENTE SALOME DILMA ZORAIDA,DILMA ZORAIDA CLEMENTE SALOME,DILMA ZORAIDA,-0.250922
88,VIVIANO VALDEZ ROMAIN IGORT,ROMAIN IGORT VIVIANO VALDEZ,ROMAIN IGORT,-0.483749


In [22]:
amag = pd.concat([amag_i, amag_ii], ignore_index=True)
amag

Unnamed: 0,participant_apellido_nombre,participant_nombre_apellido,names,iat_score
0,PEREZ CALDERON PATRICIA DEL CARMEN,PATRICIA DEL CARMEN PEREZ CALDERON,PATRICIA DEL CARMEN,-0.176195
1,FRISANCHO ENRIQUEZ CARLOS,CARLOS FRISANCHO ENRIQUEZ,CARLOS,-0.646047
2,VALDIVIA CALDERON LUIS ENRIQUE,LUIS ENRIQUE VALDIVIA CALDERON,LUIS ENRIQUE,-0.708610
3,VITERI VALIENTE YESSICA PAOLA,YESSICA PAOLA VITERI VALIENTE,YESSICA PAOLA,-0.255366
4,TICONA MIRANDA MARCO ANTONIO,MARCO ANTONIO TICONA MIRANDA,MARCO ANTONIO,-0.019535
...,...,...,...,...
398,ASMAD CORCUERA MARCO ANTONIO,MARCO ANTONIO ASMAD CORCUERA,MARCO ANTONIO,-0.265571
399,MIO LOPEZ FLOR GRACIELA,FLOR GRACIELA MIO LOPEZ,FLOR GRACIELA,-0.160823
400,AGUILAR BAZAN MARLY YOHANA,MARLY YOHANA AGUILAR BAZAN,MARLY YOHANA,-0.243329
401,SANTILLAN TUESTA JUAN CARLOS,JUAN CARLOS SANTILLAN TUESTA,JUAN CARLOS,0.088814


In [23]:
amag.to_csv(r"D:\Proyectos\amag\iat_bernardo\iat_list.csv", index= False)

## Step 2: Creating the judges outcomes 

### Reading our final decisions

Since this a test code, we will only work with the case files with final decisions and  no second instance decisions, from 2018. 

However, this code could be modify it to work with the other case files.

In [24]:
judgements = pd.read_csv(r"D:\Proyectos\amag\classification\intermediate\judgements_case_file_2018.csv")
orders = pd.read_csv(r"D:\Proyectos\amag\classification\intermediate\orders_case_file_2018.csv")
final_decisions = pd.concat([judgements, orders], ignore_index=True)

In [27]:
final_decisions.drop(columns=['score'], inplace=True)

In [29]:
first_instance = final_decisions[final_decisions['second_instance'] == False]
first_instance

Unnamed: 0,case_file,id,issued_date,resolution_number,resolution_type,notified_date,summary,court,judicial_district,judge,start_date,procedure_type,law_field,sub_law_field,opinion_text,judge_from_opinion,second_instance,days_difference
0,00006-2018-0-0210-JP-FC-01,documentoD.html?nid=GoktBpGEdOROVEm,2021-02-08,TRECE,SENTENCIA,24/08/2021,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tDECISIÓN:\nP...,JUZGADO DE PAZ LETRADO - Sede Pomabamba,ANCASH,ALVAREZ ACERO ROCIO DELSY,2018-01-11,ESPECIAL,FAMILIA CIVIL,DECLARACION JUDICIAL DE PATERNIDAD EXTRAMATRIM...,JUZGADO DE PAZ LETRADO - Sede Pomabamba \nEXPE...,ALVAREZ ACERO ROCIO DELSY,False,1124.0
1,00006-2018-0-0215-JP-FC-01,documentoD.html?nid=JduMwGNHzBYHcRxVouD,2021-12-08,VEINTIOCHO,SENTENCIA,25/08/2021,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tFALLA: DECLA...,JUZGADO DE PAZ LETRADO ITINERANTE-Sede Asunción,ANCASH,HINOSTROZA HUERTA HILDA MARIA,2018-04-12,UNICO,FAMILIA CIVIL,AUMENTO DE ALIMENTOS,JUZGADO DE PAZ LETRADO ITINERANTE-Sede Asunció...,HINOSTROZA HUERTA HILDA MARIA,False,1336.0
2,00007-2018-0-0201-JP-FC-02,documentoD.html?nid=yyHFwZbLpJQIWNYQJ,2020-10-30,DIECISIETE,SENTENCIA,30/10/2020,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tFALLA:\n\n1....,2° JUZGADO FAMILIA - Sede Central,ANCASH,VILLAFAN CANO HOMMER FREY,2018-01-05,UNICO,FAMILIA CIVIL,EXONERACION DE ALIMENTOS,CORTE SUPERIOR DE JUSTICIA DE ANCASH \n\nTERCE...,VILLAFAN CANO HOMMER FREY,False,1029.0
3,00008-2018-0-0201-JP-FC-01,documentoD.html?nid=IEsgCKwvAcFQGWpZh,2019-10-29,ONCE,SENTENCIA,29/10/2019,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tSENTENCIAS\n...,3°JUZGADO DE PAZ LETRADO - Sede Central,ANCASH,"HUERTA BOJORQUEZ, CARMEN EUGENIA",2018-01-05,UNICO,FAMILIA CIVIL,ALIMENTOS,CORTE...,"HUERTA BOJORQUEZ, CARMEN EUGENIA",False,662.0
4,00022-2018-0-0201-JP-FC-01,documentoD.html?nid=ajijiYXWiaSBOMMNrYdB,2019-12-04,DOCE,SENTENCIA,13/04/2019,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t1.\tDECLARAR...,1° JUZGADO PAZ LETRADO - Sede Central,ANCASH,RETUERTO TUEROS BESSI YOHANA,2018-01-10,UNICO,FAMILIA CIVIL,ALIMENTOS,\n1° JUZGADO PAZ LETRADO - Sede Central- Loca...,RETUERTO TUEROS BESSI YOHANA,False,693.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101685,00670-2018-0-3301-JR-FT-01,documentoD.html?nid=ecUbLzbNBWucOMqMk,2018-08-08,UNO,AUTO FINAL,08/08/2018,"\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t, SE RESUELV...",JUZGADO CIVIL - SEDE ANCON,PUENTE PIEDRA - VENTANILLA,<NO DEFINIDO>,2018-08-06,UNICO,FAMILIA TUTELAR,VIOLENCIA FAMILIAR,\n CORTE SUPERIOR DE JUSTICIA...,ROY ESTEBAN ALVA NAVARRO,False,2.0
101686,00671-2018-0-3301-JR-FT-01,documentoD.html?nid=yAeyZStouvtVKisRi,2018-09-08,UNO,AUTO FINAL,09/08/2018,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tSE RESUELVE:...,JUZGADO CIVIL - SEDE ANCON,PUENTE PIEDRA - VENTANILLA,<NO DEFINIDO>,2018-08-06,UNICO,FAMILIA TUTELAR,VIOLENCIA FAMILIAR,\n CORTE SUPERIOR DE JUSTICIA...,ROY ESTEBAN ALVA NAVARRO,False,33.0
101687,00672-2018-0-3301-JR-FT-01,documentoD.html?nid=tVdaUiHInwcDcIVKBkJ,2018-04-23,UNO,AUTO FINAL,23/04/2018,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tEL CESE INME...,1° JUZGADO DE FAMILIA DE VENTANILLA Y MI PERÚ,PUENTE PIEDRA - VENTANILLA,KATHERINE LA ROSA CASTILLO,2018-04-20,UNICO,FAMILIA TUTELAR,VIOLENCIA FAMILIAR,\n\n1° JUZGADO DE FAMILIA - SEDE ANEXO 1\nEXPE...,KATHERINE LA ROSA CASTILLO,False,3.0
101688,00673-2018-0-3301-JR-FT-01,documentoD.html?nid=aomgGMFOLByXAeHNc,2018-09-08,UNO,AUTO FINAL,09/08/2018,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tSE RESUELVE:...,JUZGADO CIVIL - SEDE ANCON,PUENTE PIEDRA - VENTANILLA,<NO DEFINIDO>,2018-08-06,UNICO,FAMILIA TUTELAR,VIOLENCIA FAMILIAR,\n CORTE SUPERIOR DE JUSTICIA...,ROY ESTEBAN ALVA NAVARRO,False,33.0


In [39]:
cell_value = first_instance.iat[83870, 6]
cell_value

'\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tSE RESUELVE: DICTAR MEDIDA DE PROTECCIÓN A FAVOR DE LA PARTE AGRAVIADA\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t'

In [40]:
# The keyword approach is not the best, since it has so much limitations. 
# Remember we are working with case files from different law fields, so the judge writing style is not homogeneous accross our final decisions
# When I'm back from Germany I will work on another solution 

def contains_keywords(text, keywords):
    if text is None:
        return False
    text_lower = text.lower()
    return any(keyword.lower() in text_lower for keyword in keywords)

# First set of keywords
keywords_confirm = ['fundada', 'fundado', 'fundadas', 'fundados', 'a favor']

# Second set of keywords
keywords_reverse = ['infundada', 'improcedente', 'infundado', 'infundadas', 'infundados', 'no procede']

# Apply the function to create the 'Confirm' column
first_instance['confirm'] = first_instance['summary'].apply(contains_keywords, args=(keywords_confirm,))

# Apply the function to create the 'Reverse' column conditionally
first_instance['reverse'] = first_instance.apply(
    lambda row: contains_keywords(row['summary'], keywords_reverse) if not row['confirm'] else False, axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_instance['confirm'] = first_instance['summary'].apply(contains_keywords, args=(keywords_confirm,))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_instance['reverse'] = first_instance.apply(


## 3. Getting the gender from names

I'm working with the following csvs that contains male and female names: https://github.com/marcboquet/spanish-names 

Here is a need for improvement, the female name detector is not working properly. 

In [64]:
# Reading datasets

parts_2018 = pd.read_csv(r"C:\Users\PC\Daniel Chen Dropbox\Alexis Malca\Peru_Justice\02_Data\08_CEJ_Web\data_cleaned\DF_procedural_parts_2018.csv")
female_names = pd.read_csv(r"D:\Proyectos\amag\pruebas\mujeres.csv")
female_names = female_names.dropna()
male_names = pd.read_csv(r"D:\Proyectos\amag\pruebas\hombres.csv")
male_names = male_names.dropna()


In [53]:
parts_2018

Unnamed: 0,Expediente N°:,Parte,Tipo dePersona,Apellido Paterno /Razón Social,ApellidoMaterno,Nombres
0,00001-2018-0-0201-JP-CI-02,DEMANDADO,NATURAL,ZAVALA,CERNA,NEMESIO
1,00001-2018-0-0201-JP-CI-02,DEMANDANTE,NATURAL,ZAVALA,CERNA,CLEMENCIA
2,00001-2018-0-0202-JP-CI-01,DEMANDADO,NATURAL,CAMONES,MAGUIÑA,ORLANDO AMADOR
3,00001-2018-0-0202-JP-CI-01,DEMANDANTE,NATURAL,FERNANDEZ,FERNANDEZ,LUCIO ENRIQUE
4,00002-2018-0-0201-JP-CI-01,DEMANDANTE,NATURAL,GRANADOS,RODRIGUEZ,GUILLERMO
...,...,...,...,...,...,...
365795,00006-2018-0-3301-SP-FC-01,DEMANDANTE,NATURAL,MORALES,OLIVARES,FIORELA LISBETH
365796,00007-2018-0-3301-SP-FC-01,DEMANDADO,NATURAL,ALVA,DEL AGUILA,ELVIS
365797,00007-2018-0-3301-SP-FC-01,DEMANDANTE,NATURAL,PEREZ,CAMPOS,LEONOR
365798,00001-2018-1-3301-SP-LA-01,QUEJOSO,JURIDICA,,\t\t\t\t\t\t\t\t\t\t\t\t\t\tCONSORCIO EDUCATIV...,\t\t\t\t\t\t\t\t\t\t\t\t\t


In [65]:
male = male_names['nombre'].tolist()
female = female_names['nombre'].tolist()

In [66]:
def contains_any_name(text, names):
    if pd.isna(text):  # Check if text is NaN
        return False
    elif any(name in text for name in names):  # Check if any name is in the text
        return True
    else:
        return False

# Apply the function to the 'Nombres' column
parts_2018['female_part'] = parts_2018['Nombres'].apply(lambda x: contains_any_name(x, female))

In [67]:
parts_2018

Unnamed: 0,Expediente N°:,Parte,Tipo dePersona,Apellido Paterno /Razón Social,ApellidoMaterno,Nombres,female_part
0,00001-2018-0-0201-JP-CI-02,DEMANDADO,NATURAL,ZAVALA,CERNA,NEMESIO,False
1,00001-2018-0-0201-JP-CI-02,DEMANDANTE,NATURAL,ZAVALA,CERNA,CLEMENCIA,True
2,00001-2018-0-0202-JP-CI-01,DEMANDADO,NATURAL,CAMONES,MAGUIÑA,ORLANDO AMADOR,True
3,00001-2018-0-0202-JP-CI-01,DEMANDANTE,NATURAL,FERNANDEZ,FERNANDEZ,LUCIO ENRIQUE,True
4,00002-2018-0-0201-JP-CI-01,DEMANDANTE,NATURAL,GRANADOS,RODRIGUEZ,GUILLERMO,True
...,...,...,...,...,...,...,...
365795,00006-2018-0-3301-SP-FC-01,DEMANDANTE,NATURAL,MORALES,OLIVARES,FIORELA LISBETH,True
365796,00007-2018-0-3301-SP-FC-01,DEMANDADO,NATURAL,ALVA,DEL AGUILA,ELVIS,False
365797,00007-2018-0-3301-SP-FC-01,DEMANDANTE,NATURAL,PEREZ,CAMPOS,LEONOR,True
365798,00001-2018-1-3301-SP-LA-01,QUEJOSO,JURIDICA,,\t\t\t\t\t\t\t\t\t\t\t\t\t\tCONSORCIO EDUCATIV...,\t\t\t\t\t\t\t\t\t\t\t\t\t,False


### 4. Getting the judges with IAT score

In [58]:
from fuzzywuzzy import fuzz

def check_similarity(value, comparison_list, threshold=90):
    value = str(value)  # Convert value to string
    for comparison_value in comparison_list:
        if fuzz.ratio(value, comparison_value) >= threshold:
            return True
    return False

# Ensure that the columns are of type string
first_instance['judge_from_opinion'] = first_instance['judge_from_opinion'].astype(str)
comparison_list_apellido_nombre = [str(name) for name in amag['participant_apellido_nombre'].tolist()]
comparison_list_nombre_apellido = [str(name) for name in amag['participant_nombre_apellido'].tolist()]

# Check for similarities
similarities = []
for value in first_instance['judge_from_opinion']:
    if (check_similarity(value, comparison_list_apellido_nombre) or 
        check_similarity(value, comparison_list_nombre_apellido)):
        similarities.append(value)

# Filter the DataFrame based on similarities
iat_judges = first_instance[first_instance['judge_from_opinion'].isin(similarities)]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_instance['judge_from_opinion'] = first_instance['judge_from_opinion'].astype(str)


In [60]:
iat_judges.to_csv(r"D:\Proyectos\amag\iat_bernardo\iat_judges_2018.csv", index= False)