In [1]:
import numpy as np
from pathlib import Path
import pandas as pd
import os
from datetime import datetime
import logging
import matplotlib.pyplot as plt
file_path = '/mnt/c/Users/Administrator/OneDrive/Documents/DCRT/RAW DATA'

In [2]:
# read stata 
df  = pd.read_csv(f'{file_path}/SC/sc-2023-24.csv')

In [3]:
df.rename(columns={'bench_1':'judge_1','bench_2':'judge_2', 'bench_3':'judge_3', 'bench_4':'judge_4', 'bench_5':'judge_5', 'bench_6':'judge_6', 'bench_7':'judge_7'}, inplace=True)

In [4]:
# join 'caseid_type','caseid_no', 'filed_yyyy' to create case number
df['case_number'] = df['caseid_type'].astype(str) + '-' + df['caseid_no'].astype(str) + '-' + df['filed_yyyy'].astype(str)

In [5]:
drs = ['KARIUKI, NELLY WANGECHI', 'KASAVULI, BERNARD', 'WACHIRA, LETIZIA M.']

In [6]:
# strip leading and trailing spaces from the 'judge_1', 'judge_2' and 'judge_3' columns 
for column in ['judge_1', 'judge_2', 'judge_3', 'judge_4', 'judge_5', 'judge_6', 'judge_7']:
    df[column] = df[column].str.strip()

In [7]:
# create a dataframe of drs only based on 'judge_1 - judge_7' columns if they are in the list of drs 
df_drs = df[df['judge_1'].isin(drs) | df['judge_2'].isin(drs) | df['judge_3'].isin(drs) | df['judge_4'].isin(drs) | df['judge_5'].isin(drs) | df['judge_6'].isin(drs) | df['judge_7'].isin(drs)].copy()

In [8]:
# ensure columns "judge_1", "judge_2" and "judge_3" do not contain names in the drs list
for column in ['judge_1', 'judge_2', 'judge_3', 'judge_4', 'judge_5', 'judge_6', 'judge_7']:
    df = df[~df[column].isin(drs)].copy()

In [9]:
# add a new column 'matters handled' to the df_drs dataframe
df['matters_handled'] = 1

## Create a group of of pannel

In [10]:
def drop_nan_lists(column):
    result = []
    for item in column:
        if isinstance(item, list):
            if not any(pd.isna(x) for x in item):
                result.append(item)
        elif not pd.isna(item):
            result.append(item)
    return result

In [11]:
# Function to check similarity between lists
def are_lists_similar(list1, list2):
    return sorted(list1) == sorted(list2)

# Function to create groupings of similar lists
def create_groups(df, col_name):
    group_number = 1
    groups = {}
    
    for idx, row in df.iterrows():
        found_group = False
        for group_id, group in groups.items():
            if any(are_lists_similar(row[col_name], x) for x in group):
                groups[group_id].append(row[col_name])
                found_group = True
                break
        
        if not found_group:
            group_name = f"Group_{group_number}"
            groups[group_name] = [row[col_name]]
            group_number += 1
    
    return groups

In [12]:
df['bench_name'] = df.apply(lambda row: [row['judge_1'], row['judge_2'], row['judge_3'], row['judge_4'], row['judge_5'], row['judge_6'], row['judge_7']], axis=1)
## Drop where the combined bench column contains null values
df.loc[:, 'bench_name'] = df['bench_name'].apply(lambda x: drop_nan_lists(x))

In [13]:
# Apply the function to create groupings
groupings = create_groups(df, 'bench_name')
# Create a mapping of list to group number
group_map = {}
for group_id, group in groupings.items():
    for item in group:
        group_map[str(item)] = group_id

In [14]:
# Apply the mapping to create a 'group' column in the DataFrame
df['bench_panel'] = df['bench_name'].apply(lambda x: group_map[str(x)])
# sort the elements of the 'bench_panel' column
df['bench_name'] = df['bench_name'].apply(lambda x: sorted(x))
df['bench_tuple'] = df['bench_name'].apply(tuple)

In [15]:
# drop if length of df['bench_panel'] is < 2 
df_bench = df[df['bench_name'].apply(lambda x: len(x) >= 2)].copy()

In [16]:
MERIT_CATEGORY ={
    'Merit Resolution' : [     
    'Judgment Delivered- Case Closed',
    'Judgment Delivered', 
    'Judgment Delivered- Acquittal',
    'Judgment Delivered- Convicted',
    'Retrial',
    'Appeal Dismissed',
    'Grant Revoked',
],
'Non Merit Resolution': [
    'Grant Confirmed', 
    'Matter Withdrawn',
    'Dismissed For Want Of Prosecution - Case Closed',
    'Dismissed',
    'Terminated/ Struck Out/ Dismissed/case Closed', 
    'Application Allowed - Case Closed',
    'Matter Settled- Case Closed', 
    'Consent Recorded - Case Closed',
    'Application Withdrawn - Case Closed',
    'Struck Out', 
    'Application Dismissed - Case Closed',
    'Out Of Court Settlement Reached', 
    'Terminated',
    'Consolidated- Case Closed',
    'Interlocutory Judgement Entered', 
    'Abated', 
    'Limited Grant Issued',
    'Placed In Probation', 
    'Revision Declined',  
    'Probation Orders Issued',
    'Matter Settled Through Mediation', 
    'Appeal Rejected', 
    'Order Issued - Case Closed',
    'Terminated'  
    ],
    'Rulings': [
    'Ruling delivered- Accused put on defense',
    'Ruling Delivered- Case Closed', 
    'Ruling Delivered- Accused Discharged',
    'Ruling Delivered- Application Closed',
    'Ruling Delivered- Case Closed',
    'Ruling-Case Closed',
    ],
    'Ruling Delivered': ['Ruling Delivered']
} 


In [17]:
def apply_title_case(text):
    """
    Apply title case to a given string.
    
    Args:
        text: The input string to process.
    
    Returns:
        str: The processed string in title case.
    """
    if pd.isna(text):
        return np.nan
    if not isinstance(text, str):
        return str(text)
    return text.title()
df_bench['outcome'] = df_bench['outcome'].apply(apply_title_case)

In [18]:
# convert all coumns to uppercase
df_bench['outcome'] = df_bench['outcome'].str.title()

In [22]:
df_bench['merit_classification'] = df_bench['outcome'].map({v: k for k, values in MERIT_CATEGORY.items() for v in values}).copy()

In [31]:
bench_performance = df_bench.pivot_table(index='bench_tuple', columns='merit_classification', values='case_number', aggfunc='count', fill_value=0).assign(total=lambda x: x.sum(axis=1)).sort_values('total', ascending=False).drop(columns='total')

In [32]:
matters_handled = df_bench.groupby('bench_tuple')['matters_handled'].sum().reset_index()
bench_performance = bench_performance.merge(matters_handled, on='bench_tuple', how='left')
bench_performance.rename(columns={'matters_handled': 'total_matters_handled'}, inplace=True)
bench_performance.reset_index(inplace=True)
bench_performance.rename(columns={'index': 'bench_panel'}, inplace=True)

In [None]:
bench_performance

In [33]:
bench_performance.to_csv(f'{file_path}/SC/bench_performance.csv', index=True)

In [34]:
df_bench.groupby('merit_classification').agg({'case_number': 'count'}).reset_index().sort_values('case_number', ascending=False)

Unnamed: 0,merit_classification,case_number
3,Rulings,36
0,Merit Resolution,21
2,Ruling Delivered,19
1,Non Merit Resolution,1


In [None]:
bench_sittings = df_bench.groupby('bench_tuple').size().sort_values(ascending=False)

bench_tuple
(IBRAHIM, MOHAMMED K., KOOME, MARTHA K., LENAOLA, ISAAC, MWILU, PHILOMENA MBETE, NDUNGU, SUSANNA NJOKI, OUKO, WILLIAM, WANJALA, SMOKIN C.)    20
(IBRAHIM, MOHAMMED K., LENAOLA, ISAAC, NDUNGU, SUSANNA NJOKI, OUKO, WILLIAM, WANJALA, SMOKIN C.)                                              15
(IBRAHIM, MOHAMMED K., MWILU, PHILOMENA MBETE, NDUNGU, SUSANNA NJOKI, OUKO, WILLIAM, WANJALA, SMOKIN C.)                                       9
(IBRAHIM, MOHAMMED K., LENAOLA, ISAAC, MWILU, PHILOMENA MBETE, OUKO, WILLIAM, WANJALA, SMOKIN C.)                                              8
(IBRAHIM, MOHAMMED K., LENAOLA, ISAAC, MWILU, PHILOMENA MBETE, NDUNGU, SUSANNA NJOKI, WANJALA, SMOKIN C.)                                      7
(IBRAHIM, MOHAMMED K., KOOME, MARTHA K., LENAOLA, ISAAC, MWILU, PHILOMENA MBETE, WANJALA, SMOKIN C.)                                           6
(LENAOLA, ISAAC, MWILU, PHILOMENA MBETE, NDUNGU, SUSANNA NJOKI, OUKO, WILLIAM, WANJALA, SMOKIN C.)                    

In [None]:
# Assign a name to the bench_sittings Series
bench_sittings.name = 'sittings'

# bench overall performance is merged bench_performance and bench_sittings
bench_overall_performance = pd.merge(bench_performance, bench_sittings, on='bench_tuple', how='outer')

In [None]:
bench_overall_performance.groupby('bench_tuple').sum().sort_values('sittings', ascending=False).to_csv(f'{file_path}/ANALYSIS/COA/coa-23-24-bench-overall-performance.csv')

In [None]:
# matters handled by each bench
df_bench['bench_name'] = df_bench['bench_name'].apply(lambda x: sorted(x))

In [None]:
bench_performance.to_csv('/home/fiend/Documents/coa/CoA/bench_performance.csv')

In [None]:
df_bench.groupby('merit_classification').size()

In [None]:
df_bench.groupby('outcome').size().reset_index(name='count').sort_values('count', ascending=False)

In [None]:
df_bench = df_bench.dropna(subset=['productivity_outcome'])

### Top bench sittings

In [None]:
df_bench.groupby('bench_tuple')['productivity_outcome'].sum().reset_index(name='total').sort_values('total', ascending=False)

In [None]:
df_bench.groupby('bench_panel')['productivity_outcome'].sum().reset_index(name='total').sort_values('total', ascending=False)

In [None]:
df_bench

## Approach 2

In [None]:
# 2. Create a bench combination column by collecting non-null judge names, sorting them, and converting to a tuple
def get_bench_combination(row):
    judges = [row['judge_1'], row['judge_2'], row['judge_3']]
    judges = [j for j in judges if pd.notnull(j)]
    return tuple(sorted(judges))

In [None]:
df['bench_combination'] = df.apply(get_bench_combination, axis=1)

# 3. Sort the DataFrame so that if a case has any concluded row, it appears first
df_sorted = df.sort_values(by='concluded', ascending=False)

# 4. Drop duplicate case_number entries, keeping the first occurrence (which will be a concluded row if it exists)
df_unique = df_sorted.drop_duplicates(subset='case_number', keep='first')



In [None]:
# 5. Filter to only include cases that have concluded (concluded == 1)
df_concluded = df_unique[df_unique['concluded'] == 1]

# 6. Group by bench combination and count the number of concluded cases per bench
bench_concluded_counts = df_concluded.groupby('bench_combination').size()


In [None]:
df_concluded.groupby('bench_tuple').size().reset_index(name='count').sort_values('count', ascending=False)

## concluded per judge

In [None]:
# 5. Melt the judge columns into a single column; keep case_number and case_type for reference.
df_melt = df_concluded.melt(id_vars=['case_number', 'productivity'],
                            value_vars=['judge_1', 'judge_2', 'judge_3'],
                            value_name='judge')

# Remove any rows with missing judge names
df_melt = df_melt.dropna(subset=['judge'])



In [None]:
pd.pivot_table(df_melt,
                             index='judge',
                             columns='productivity',
                             aggfunc='size',
                             fill_value=0).to_csv(f'{file_path}/judge_productivity.csv')

In [None]:
# 6. Count the occurrences of each judge
#    Each occurrence represents a concluded case where that judge sat.
judge_concluded_counts = df_melt['judge'].value_counts()

In [None]:
judge_concluded_counts

In [None]:
file_path = '/home/fiend/Documents/coa/CoA/'

In [None]:
df.pivot_table(index='bench_tuple', columns='productivity', values='concluded', aggfunc='sum', fill_value=0).rename_axis(columns=None)
#.to_csv(f'{file_path}/bench_productivity.csv')

In [None]:
df.groupby('bench_tuple').size().reset_index(name='count').sort_values('count', ascending=False)

In [None]:
# bench with the highest sittings 
top_20_bench_sittings = df.groupby('bench_tuple').size().reset_index(name='count').sort_values('count', ascending=False).head(20)

In [None]:
top_20_bench_sittings.to_csv('top_20_bench_sittings.csv')

### Judge who sat in the most benches

In [None]:
# Flatten the bench_tuple column and count occurrences of each name
most_bench_sittings = df['bench_tuple'].explode().value_counts()
# Get the names that appear most frequently
top_10_judges = most_bench_sittings.nlargest(10)  
top_10_judges

In [None]:
most_bench_sittings.to_csv('top_judges_most_bench_sittings.csv')

#### Judge who sat in the most benches that concluded cases

In [None]:
# Filter the DataFrame for rows where 'concluded' is 1
df_concluded = df[df['concluded'] == 1]

# Flatten the bench_tuple column and count occurrences of each name
most_bench_conclusions = df_concluded['bench_tuple'].explode().value_counts()
# Get the names that appear most frequently
#top_ten_resolutions = most_bench_conclusions.nlargest(10)


In [None]:
most_bench_conclusions.to_csv('top_ten_resolutions.csv')

## Bench adjournments

In [None]:
# replace df['reason_adj'] with NaN if  == Other (specify in details of case)
df.loc[df['reason_adj'] == 'Other (specify in details of case)', 'reason_adj'] = np.nan
# create a column of 1 if reason_adj is present otherwise 0
df['adjourned'] = df['reason_adj'].notna().astype(int)

In [None]:
# Filter rows where reason_adj_indicator is 1
adjourned_df = df[df['adjourned'] == 1]

In [None]:
# Group by 'bench_panel' and 'judge_list'
grouped_df = adjourned_df.groupby(['court','bench_panel', 'bench_tuple']).size().reset_index(name='count')


In [None]:
grouped_df

### Adjourments per panel

In [None]:
top_adjournments = grouped_df.groupby(['court','bench_tuple', 'bench_panel'])['count'].sum().sort_values(ascending=False).reset_index().head(20)
top_adjournments.to_csv('top_adjournments.csv')

In [None]:
top_adjournments

In [None]:
# sort grouped_df by 'count' in descending order
benched_df = grouped_df.sort_values('count', ascending=False)

In [None]:
df[(df['bench_panel']=='Group_16')].groupby('reason_adj').size().reset_index(name='count').sort_values('count', ascending=False)