**Imports**

In [93]:
import pandas as pd
import numpy as np
import seaborn as sns
from collections import Counter
import matplotlib.pyplot as plt
import json

import sys  
sys.path.append('../')

import modules.exploratory_data_analysis.base_rates as base_rates
import modules.exploratory_data_analysis.statistical_analysis as statistical_analysis

import warnings
warnings.filterwarnings('ignore')

from importlib import reload
statistical_analysis = reload(statistical_analysis)

# Load extended dataset

In [94]:
df = pd.read_csv("..\..\output\extended_dataset.csv", sep=";")
df.head()

Unnamed: 0,DATE,YEAR,Train,ID,URL,HQ,AUT_COMM,PLAIN_ML,PLAIN_WO,DEFEN_ML,...,FT_AG,LN_JNPREF_b,LN_JNPREF,FT_SL_IN_b,FT_JN_IN_b,FT_SL_IN,FT_JN_IN,JUDGE_ML,JUDGE_ID,Unnamed: 113
0,43362,2018,1,APA_2018_3010,http://labje.unizar.es/sentencias/APA_2018_301...,Alicante,Comunidad Valenciana,1,0,0,...,0,0,0,3,2,5,4,1,0,
1,43621,2019,0,APA_2019_1490,http://labje.unizar.es/sentencias/APA_2019_149...,Alicante,Comunidad Valenciana,1,0,0,...,0,0,0,1,1,1,3,1,1,
2,43607,2019,0,APA_2019_1497,http://labje.unizar.es/sentencias/APA_2019_149...,Alicante,Comunidad Valenciana,1,0,0,...,0,0,0,4,3,8,6,1,1,
3,43600,2019,0,APA_2019_1500,http://labje.unizar.es/sentencias/APA_2019_150...,Alicante,Comunidad Valenciana,1,0,0,...,0,0,0,2,1,3,2,0,2,
4,43818,2019,0,APA_2019_15542,http://labje.unizar.es/sentencias/APA_2019_155...,Barcelona,Cataluña,1,0,0,...,0,0,0,2,0,3,0,1,3,


## Economic features analysis (plaintiff)

### By type of request

In [95]:
base_rates.percentages_by_group(df, "RQ_MP_AT", "RQ_JOINT", "Request sole asks for maintenance payments attribution", "Request joint asks for maintenance payments attribution")

RQ_JOINT = 0 -->  RQ_MP_AT: Counter({0: 520, 1: 333})

Request sole asks for maintenance payments attribution: 39.04%

RQ_JOINT = 1 -->  RQ_MP_AT: Counter({0: 1000, 1: 31})

Request joint asks for maintenance payments attribution: 3.01%


In [96]:
base_rates.percentages_by_group(df, "RQ_MP_SP", "RQ_JOINT", "Request sole asks for maintenance payments suppression", "Request joint asks for maintenance payments suppression")

RQ_JOINT = 0 -->  RQ_MP_SP: Counter({0: 733, 1: 120})

Request sole asks for maintenance payments suppression: 14.07%

RQ_JOINT = 1 -->  RQ_MP_SP: Counter({1: 583, 0: 448})

Request joint asks for maintenance payments suppression: 56.55%


In [97]:
base_rates.percentages_by_group(df, "RQ_FH_AT", "RQ_JOINT", "Request sole asks for family home attribution", "Request joint asks for family home attribution")

RQ_JOINT = 0 -->  RQ_FH_AT: Counter({0: 758, 1: 95})

Request sole asks for family home attribution: 11.14%

RQ_JOINT = 1 -->  RQ_FH_AT: Counter({0: 947, 1: 84})

Request joint asks for family home attribution: 8.15%


In [98]:
base_rates.percentages_by_group(df, "RQ_FH_SP", "RQ_JOINT", "Request sole asks for family home suppression", "Request joint asks for family home suppression")

RQ_JOINT = 0 -->  RQ_FH_SP: Counter({0: 845, 1: 8})

Request sole asks for family home suppression: 0.94%

RQ_JOINT = 1 -->  RQ_FH_SP: Counter({0: 965, 1: 66})

Request joint asks for family home suppression: 6.4%


### Maintenance payments request (attribution and suppression)

In [99]:
base_rates.percentages_by_group(df, "RQ_MP_AT", "PLAIN_ML", "Female plaintiff asks for maintenance payments attribution", "Male plaintiff asks for maintenance payments attribution")

PLAIN_ML = 0 -->  RQ_MP_AT: Counter({0: 473, 1: 280})

Female plaintiff asks for maintenance payments attribution: 37.18%

PLAIN_ML = 1 -->  RQ_MP_AT: Counter({0: 1047, 1: 84})

Male plaintiff asks for maintenance payments attribution: 7.43%


In [100]:
base_rates.percentages_by_group(df, "RQ_MP_SP", "PLAIN_ML", "Female plaintiff asks for maintenance payments suppression", "Male plaintiff asks for maintenance payments suppression")

PLAIN_ML = 0 -->  RQ_MP_SP: Counter({0: 654, 1: 99})

Female plaintiff asks for maintenance payments suppression: 13.15%

PLAIN_ML = 1 -->  RQ_MP_SP: Counter({1: 604, 0: 527})

Male plaintiff asks for maintenance payments suppression: 53.4%


<div class="alert alert-block alert-warning col-md-12">

<p>
The percentages between female and male plaintiffs when asking for the attribution or the suppression of maintenance payments are highly imbalanced. Females tend to ask much more than males for the attribution and males tend to ask much more than females for their suppression.</p>

</div>

#### Maintenance payments request attribution and suppression - sole custody request

In [101]:
df_request_sole, df_request_joint = base_rates.df_by_group(df, "RQ_JOINT")

In [102]:
base_rates.percentages_by_group(df_request_sole, "RQ_MP_AT", "PLAIN_ML", "Female plaintiff asks for maintenance payments attribution", "Male plaintiff asks for maintenance payments attribution")

PLAIN_ML = 0 -->  RQ_MP_AT: Counter({0: 378, 1: 273})

Female plaintiff asks for maintenance payments attribution: 41.94%

PLAIN_ML = 1 -->  RQ_MP_AT: Counter({0: 142, 1: 60})

Male plaintiff asks for maintenance payments attribution: 29.7%


In [103]:
base_rates.percentages_by_group(df_request_sole, "RQ_MP_SP", "PLAIN_ML", "Female plaintiff asks for maintenance payments suppression", "Male plaintiff asks for maintenance payments suppression")

PLAIN_ML = 0 -->  RQ_MP_SP: Counter({0: 594, 1: 57})

Female plaintiff asks for maintenance payments suppression: 8.76%

PLAIN_ML = 1 -->  RQ_MP_SP: Counter({0: 139, 1: 63})

Male plaintiff asks for maintenance payments suppression: 31.19%


#### Maintenance payments request attribution and suppression - joint custody request

In [104]:
base_rates.percentages_by_group(df_request_joint, "RQ_MP_AT", "PLAIN_ML", "Female plaintiff asks for maintenance payments attribution", "Male plaintiff asks for maintenance payments attribution")

PLAIN_ML = 0 -->  RQ_MP_AT: Counter({0: 95, 1: 7})

Female plaintiff asks for maintenance payments attribution: 6.86%

PLAIN_ML = 1 -->  RQ_MP_AT: Counter({0: 905, 1: 24})

Male plaintiff asks for maintenance payments attribution: 2.58%


In [105]:
base_rates.percentages_by_group(df_request_joint, "RQ_MP_SP", "PLAIN_ML", "Female plaintiff asks for maintenance payments suppression", "Male plaintiff asks for maintenance payments suppression")

PLAIN_ML = 0 -->  RQ_MP_SP: Counter({0: 60, 1: 42})

Female plaintiff asks for maintenance payments suppression: 41.18%

PLAIN_ML = 1 -->  RQ_MP_SP: Counter({1: 541, 0: 388})

Male plaintiff asks for maintenance payments suppression: 58.23%


### Family home request (attribution and suppression)

In [106]:
base_rates.percentages_by_group(df, "RQ_FH_AT", "PLAIN_ML", "Female plaintiff asks for family home attribution", "Male plaintiff asks for family home attribution")

PLAIN_ML = 0 -->  RQ_FH_AT: Counter({0: 666, 1: 87})

Female plaintiff asks for family home attribution: 11.55%

PLAIN_ML = 1 -->  RQ_FH_AT: Counter({0: 1039, 1: 92})

Male plaintiff asks for family home attribution: 8.13%


In [107]:
base_rates.percentages_by_group(df, "RQ_FH_SP", "PLAIN_ML", "Female plaintiff asks for family home suppression", "Male plaintiff asks for family home suppression")

PLAIN_ML = 0 -->  RQ_FH_SP: Counter({0: 744, 1: 9})

Female plaintiff asks for family home suppression: 1.2%

PLAIN_ML = 1 -->  RQ_FH_SP: Counter({0: 1066, 1: 65})

Male plaintiff asks for family home suppression: 5.75%


<div class="alert alert-block alert-warning col-md-12">

<p>
For the family home attribution or suppression request, we can see the same pattern as in the maintenance payments request, although the differences are not that extreme.</p>

</div>

#### Family home request (attribution and suppression) - sole custody

In [108]:
base_rates.percentages_by_group(df_request_sole, "RQ_FH_AT", "PLAIN_ML", "Female plaintiff asks for family home attribution", "Male plaintiff asks for family home attribution")

PLAIN_ML = 0 -->  RQ_FH_AT: Counter({0: 571, 1: 80})

Female plaintiff asks for family home attribution: 12.29%

PLAIN_ML = 1 -->  RQ_FH_AT: Counter({0: 187, 1: 15})

Male plaintiff asks for family home attribution: 7.43%


In [109]:
base_rates.percentages_by_group(df_request_sole, "RQ_FH_SP", "PLAIN_ML", "Female plaintiff asks for family home suppression", "Male plaintiff asks for family home suppression")

PLAIN_ML = 0 -->  RQ_FH_SP: Counter({0: 647, 1: 4})

Female plaintiff asks for family home suppression: 0.61%

PLAIN_ML = 1 -->  RQ_FH_SP: Counter({0: 198, 1: 4})

Male plaintiff asks for family home suppression: 1.98%


#### Family home request (attribution and suppression) - joint custody

In [110]:
base_rates.percentages_by_group(df_request_joint, "RQ_FH_AT", "PLAIN_ML", "Female plaintiff asks for family home attribution", "Male plaintiff asks for family home attribution")

PLAIN_ML = 0 -->  RQ_FH_AT: Counter({0: 95, 1: 7})

Female plaintiff asks for family home attribution: 6.86%

PLAIN_ML = 1 -->  RQ_FH_AT: Counter({0: 852, 1: 77})

Male plaintiff asks for family home attribution: 8.29%


In [111]:
base_rates.percentages_by_group(df_request_joint, "RQ_FH_SP", "PLAIN_ML", "Female plaintiff asks for family home suppression", "Male plaintiff asks for family home suppression")

PLAIN_ML = 0 -->  RQ_FH_SP: Counter({0: 97, 1: 5})

Female plaintiff asks for family home suppression: 4.9%

PLAIN_ML = 1 -->  RQ_FH_SP: Counter({0: 868, 1: 61})

Male plaintiff asks for family home suppression: 6.57%


### Maintenance payments resolution

In [112]:
def attribution_suppression_by_gender(df, var_attr, var_supp):
    print("ATTRIBUTION")
    print("-----------")
    base_rates.percentages_by_group(df, var_attr, "PLAIN_ML", "Female plaintiff", "Male plaintiff")
    print("\n")
    print("SUPPRESSION")
    print("-----------")
    base_rates.percentages_by_group(df, var_supp, "PLAIN_ML", "Female plaintiff", "Male plaintiff")

In [113]:
attribution_suppression_by_gender(df, "CD_MP_AT", "CD_MP_SP")

ATTRIBUTION
-----------
PLAIN_ML = 0 -->  CD_MP_AT: Counter({0: 560, 1: 193})

Female plaintiff: 25.63%

PLAIN_ML = 1 -->  CD_MP_AT: Counter({0: 629, 1: 502})

Male plaintiff: 44.39%


SUPPRESSION
-----------
PLAIN_ML = 0 -->  CD_MP_SP: Counter({0: 525, 1: 228})

Female plaintiff: 30.28%

PLAIN_ML = 1 -->  CD_MP_SP: Counter({0: 881, 1: 250})

Male plaintiff: 22.1%


In [114]:
attribution_suppression_by_gender(df_request_sole, "CD_MP_AT", "CD_MP_SP")

ATTRIBUTION
-----------
PLAIN_ML = 0 -->  CD_MP_AT: Counter({0: 496, 1: 155})

Female plaintiff: 23.81%

PLAIN_ML = 1 -->  CD_MP_AT: Counter({0: 130, 1: 72})

Male plaintiff: 35.64%


SUPPRESSION
-----------
PLAIN_ML = 0 -->  CD_MP_SP: Counter({0: 438, 1: 213})

Female plaintiff: 32.72%

PLAIN_ML = 1 -->  CD_MP_SP: Counter({0: 142, 1: 60})

Male plaintiff: 29.7%


In [115]:
attribution_suppression_by_gender(df_request_joint, "CD_MP_AT", "CD_MP_SP")

ATTRIBUTION
-----------
PLAIN_ML = 0 -->  CD_MP_AT: Counter({0: 64, 1: 38})

Female plaintiff: 37.25%

PLAIN_ML = 1 -->  CD_MP_AT: Counter({0: 499, 1: 430})

Male plaintiff: 46.29%


SUPPRESSION
-----------
PLAIN_ML = 0 -->  CD_MP_SP: Counter({0: 87, 1: 15})

Female plaintiff: 14.71%

PLAIN_ML = 1 -->  CD_MP_SP: Counter({0: 739, 1: 190})

Male plaintiff: 20.45%


### Family home resolution

In [116]:
attribution_suppression_by_gender(df, "CD_FH_AT", "CD_FH_SP")

ATTRIBUTION
-----------
PLAIN_ML = 0 -->  CD_FH_AT: Counter({0: 724, 1: 29})

Female plaintiff: 3.85%

PLAIN_ML = 1 -->  CD_FH_AT: Counter({0: 1062, 1: 69})

Male plaintiff: 6.1%


SUPPRESSION
-----------
PLAIN_ML = 0 -->  CD_FH_SP: Counter({0: 689, 1: 64})

Female plaintiff: 8.5%

PLAIN_ML = 1 -->  CD_FH_SP: Counter({0: 1044, 1: 87})

Male plaintiff: 7.69%


In [117]:
attribution_suppression_by_gender(df_request_sole, "CD_FH_AT", "CD_FH_SP")

ATTRIBUTION
-----------
PLAIN_ML = 0 -->  CD_FH_AT: Counter({0: 626, 1: 25})

Female plaintiff: 3.84%

PLAIN_ML = 1 -->  CD_FH_AT: Counter({0: 197, 1: 5})

Male plaintiff: 2.48%


SUPPRESSION
-----------
PLAIN_ML = 0 -->  CD_FH_SP: Counter({0: 595, 1: 56})

Female plaintiff: 8.6%

PLAIN_ML = 1 -->  CD_FH_SP: Counter({0: 189, 1: 13})

Male plaintiff: 6.44%


In [118]:
attribution_suppression_by_gender(df_request_joint, "CD_FH_AT", "CD_FH_SP")

ATTRIBUTION
-----------
PLAIN_ML = 0 -->  CD_FH_AT: Counter({0: 98, 1: 4})

Female plaintiff: 3.92%

PLAIN_ML = 1 -->  CD_FH_AT: Counter({0: 865, 1: 64})

Male plaintiff: 6.89%


SUPPRESSION
-----------
PLAIN_ML = 0 -->  CD_FH_SP: Counter({0: 94, 1: 8})

Female plaintiff: 7.84%

PLAIN_ML = 1 -->  CD_FH_SP: Counter({0: 855, 1: 74})

Male plaintiff: 7.97%


# Parallel plot

In [119]:
df.head()

Unnamed: 0,DATE,YEAR,Train,ID,URL,HQ,AUT_COMM,PLAIN_ML,PLAIN_WO,DEFEN_ML,...,FT_AG,LN_JNPREF_b,LN_JNPREF,FT_SL_IN_b,FT_JN_IN_b,FT_SL_IN,FT_JN_IN,JUDGE_ML,JUDGE_ID,Unnamed: 113
0,43362,2018,1,APA_2018_3010,http://labje.unizar.es/sentencias/APA_2018_301...,Alicante,Comunidad Valenciana,1,0,0,...,0,0,0,3,2,5,4,1,0,
1,43621,2019,0,APA_2019_1490,http://labje.unizar.es/sentencias/APA_2019_149...,Alicante,Comunidad Valenciana,1,0,0,...,0,0,0,1,1,1,3,1,1,
2,43607,2019,0,APA_2019_1497,http://labje.unizar.es/sentencias/APA_2019_149...,Alicante,Comunidad Valenciana,1,0,0,...,0,0,0,4,3,8,6,1,1,
3,43600,2019,0,APA_2019_1500,http://labje.unizar.es/sentencias/APA_2019_150...,Alicante,Comunidad Valenciana,1,0,0,...,0,0,0,2,1,3,2,0,2,
4,43818,2019,0,APA_2019_15542,http://labje.unizar.es/sentencias/APA_2019_155...,Barcelona,Cataluña,1,0,0,...,0,0,0,2,0,3,0,1,3,


In [120]:
def generate_value_counts(df, gender):
    rq_mp_at_cd__mp_at = df[df["PLAIN_ML"] == gender][df["RQ_MP_AT"] == 1][df["CD_MP_AT"] == 1].shape[0]
    rq_mp_sp_cd__mp_sp = df[df["PLAIN_ML"] == gender][df["RQ_MP_SP"] == 1][df["CD_MP_SP"] == 1].shape[0]
    rq_fh_at_cd__fh_at = df[df["PLAIN_ML"] == gender][df["RQ_FH_AT"] == 1][df["CD_FH_AT"] == 1].shape[0]
    rq_fh_sp_cd__fh_sp = df[df["PLAIN_ML"] == gender][df["RQ_FH_SP"] == 1][df["CD_FH_SP"] == 1].shape[0]
    rq_mp_at_cd__mp_at_0 = df[df["PLAIN_ML"] == gender][df["RQ_MP_AT"] == 1][df["CD_MP_AT"] == 0].shape[0]
    rq_mp_sp_cd__mp_sp_0 = df[df["PLAIN_ML"] == gender][df["RQ_MP_SP"] == 1][df["CD_MP_SP"] == 0].shape[0]
    rq_fh_at_cd__fh_at_0 = df[df["PLAIN_ML"] == gender][df["RQ_FH_AT"] == 1][df["CD_FH_AT"] == 0].shape[0]
    rq_fh_sp_cd__fh_sp_0 = df[df["PLAIN_ML"] == gender][df["RQ_FH_SP"] == 1][df["CD_FH_SP"] == 0].shape[0]

    value_counts = [rq_mp_at_cd__mp_at, rq_mp_at_cd__mp_at_0, rq_mp_sp_cd__mp_sp, rq_mp_sp_cd__mp_sp_0, rq_fh_at_cd__fh_at, rq_fh_at_cd__fh_at_0, rq_fh_sp_cd__fh_sp, rq_fh_sp_cd__fh_sp_0]

    return value_counts

In [121]:
value_counts_male = generate_value_counts(df, 1)
value_counts_male

[43, 41, 205, 399, 34, 58, 30, 35]

In [122]:
value_counts_female = generate_value_counts(df, 0)
value_counts_female

[129, 151, 41, 58, 25, 62, 4, 5]

In [123]:
value_counts = value_counts_female + value_counts_male
value_counts

[129, 151, 41, 58, 25, 62, 4, 5, 43, 41, 205, 399, 34, 58, 30, 35]

In [124]:
female_list = [0] * 8
male_list = [1] * 8
gender_list = female_list + male_list
print(gender_list)

[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]


In [125]:
#combinations_list_request_one_gender = ['RQ_MP_AT', 'RQ_MP_AT', 'RQ_MP_SP', 'RQ_MP_SP', 'RQ_FH_AT', 'RQ_FH_AT', 'RQ_FH_SP', 'RQ_FH_SP']
combinations_list_request_one_gender = ['Maintainance payments attr.', 'Maintainance payments attr.', 'Maintainance payments supp.', 'Maintainance payments supp.', 'Family home attr.', 'Family home attr.', 'Family home supp.', 'Family home supp.']
combinations_list_request = combinations_list_request_one_gender + combinations_list_request_one_gender

In [126]:
#combinations_list_resolution_one_gender = ['CD_MP_AT', 'RQ_MP_AT_0', 'CD_MP_SP', 'RQ_MP_SP_0', 'CD_FH_AT', 'RQ_FH_AT_0', 'CD_FH_SP', 'RQ_FH_SP_0']
combinations_list_resolution_one_gender = ['MP Attr.', 'MP Not Attr.', 'MP Supp.', 'MP Not Supp.', 'FH Attr.', 'FH Not Attr.', 'FH Supp.', 'FH Not Supp.']
combinations_list_resolution = combinations_list_resolution_one_gender + combinations_list_resolution_one_gender

In [127]:
import plotly.graph_objects as go

color = gender_list
colorscale = [[0, 'rgb(90,180,172,0.1)'], [1, 'rgba(216,179,101,0.8)']]

fig = go.Figure(go.Parcats(
    dimensions=[
        {'label': 'Gender',
         'values': gender_list},
        {'label': 'Request',
         'values': combinations_list_request},
        {'label': 'Resolution',
         'values': combinations_list_resolution}],
    counts=value_counts, 
    line={'color': color, 'colorscale': colorscale}
    ))

fig.update_layout(
    autosize=True,
    width=800,
    height=500,
    margin=dict(
        l=0,
        r=65,
        b=20,
        t=30
    ),
    font=dict(
        size=18
    ))

fig.write_image('economic_features.pdf', format='pdf')

fig.show()