In [1]:
import pandas as pd
import logging
from typing import Dict, Any, List, Union, Optional

In [2]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [3]:
pd.set_option('display.max_columns', 45)

In [4]:
# file_path = '/home/stanoo/Projects/data/hc'
# df = pd.read_csv(f'{file_path}/vihiga_processed.csv', parse_dates=True)

In [5]:
file_path = '/home/stanoo/Projects/data/tribunal' 
df = pd.read_csv(f'{file_path}/tribunals_processed.csv')

  df = pd.read_csv(f'{file_path}/tribunals_processed.csv')


In [6]:
# convert date columns to datetime
date_columns = ['filed_date', 'activity_date', 'next_date']
for col in date_columns:
    df[col] = pd.to_datetime(df[col], format='%Y-%m-%d')
# df sort by activity_date
df = df.sort_values(by=['activity_date', 'case_number'])


In [7]:
# where the sheduled/next date is outside the evaluation period
cutoff_date = pd.Timestamp('2024-06-30') 

In [8]:
def determine_judgment_scheduling(df, cutoff_date):
    judgment_date_set_outcomes = ["Judgment Date Given", "Judgment On Notice", "Judgment Date Set"]
    judgment_delivered_outcomes = ["Grant Revoked", "Judgment Delivered", 
                                   "Judgment Delivered- Acquittal", "Judgment Delivered- Case Closed", 
                                   "Judgment Delivered- Convicted"]
    
    # Preprocessing: Filter and sort the DataFrame upfront
    df_filtered = df[df['outcome'].isin(judgment_date_set_outcomes + judgment_delivered_outcomes)]
    df_filtered = df_filtered.sort_values(by=['case_number', 'activity_date'])
    
    # Initialize columns
    df['judgment_status'] = 'Not Scheduled'
    df['set_date'] = pd.NaT
    df['delivery_date'] = pd.NaT
    df['delivery_category'] = ''
    
    # Filter rows with judgment set outcomes and valid schedule dates
    judgment_set_rows = df_filtered[df_filtered['outcome'].isin(judgment_date_set_outcomes) & 
                                    (df_filtered['next_date'] <= cutoff_date)]
    
    # For each case, find the earliest set date
    earliest_schedule = judgment_set_rows.groupby('case_number').first().reset_index()
    
    # Create dictionaries to map case numbers to their schedule dates and statuses
    case_to_set_date = dict(zip(earliest_schedule['case_number'], earliest_schedule['next_date']))
    case_to_status = {case: 'Scheduled' for case in earliest_schedule['case_number']}
    
    # Update the result dataframe with schedule information
    df['set_date'] = df['case_number'].map(case_to_set_date)
    df['judgment_status'] = df['case_number'].map(case_to_status).fillna('Not Scheduled')
    df['delivery_category'] = df['case_number'].map(case_to_status).fillna('')
    
    # Filter rows with judgment delivered outcomes
    judgment_delivered_rows = df_filtered[df_filtered['outcome'].isin(judgment_delivered_outcomes)]
    
    # Find the first delivery date after set date
    for case_number, group in earliest_schedule.groupby('case_number'):
        set_date = group['next_date'].values[0]
        delivery = judgment_delivered_rows[(judgment_delivered_rows['case_number'] == case_number) & 
                                           (judgment_delivered_rows['activity_date'] >= set_date)]
        
        if not delivery.empty:
            delivery_date = delivery.iloc[0]['activity_date']
            df.loc[df['case_number'] == case_number, 'delivery_date'] = delivery_date
            df.loc[df['case_number'] == case_number, 'judgment_status'] = 'Delivered'
            
            if delivery_date <= set_date:
                df.loc[df['case_number'] == case_number, 'delivery_category'] = 'On Time'
            else:
                df.loc[df['case_number'] == case_number, 'delivery_category'] = 'Delayed'
        else:
            earlier_delivery = judgment_delivered_rows[(judgment_delivered_rows['case_number'] == case_number) & 
                                                       (judgment_delivered_rows['activity_date'] < set_date)]
            if earlier_delivery.empty:
                if cutoff_date >= set_date:
                    df.loc[df['case_number'] == case_number, 'judgment_status'] = 'Delayed'
                    df.loc[df['case_number'] == case_number, 'delivery_category'] = 'Delayed'
            else:
                df.loc[df['case_number'] == case_number, 'delivery_date'] = earlier_delivery.iloc[0]['activity_date']
                df.loc[df['case_number'] == case_number, 'judgment_status'] = 'Delivered'
                df.loc[df['case_number'] == case_number, 'delivery_category'] = 'On Time'
    
    return df[df['set_date'].notna()]


In [9]:
judgement_df = determine_judgment_scheduling(df, cutoff_date)

In [15]:
rrt = df[df['court'] == 'Rent Restriction']

In [21]:
# drop if df['judge] == 'Registrtar Automation'
rrt = rrt[rrt['judge_1'] != 'Registrar Automation']

In [24]:
rrt[rrt['case_number']=='Rent Restriction/RRC/E1371/2023']

Unnamed: 0,date_dd,date_mon,date_yyyy,caseid_type,caseid_no,filed_dd,filed_mon,filed_yyyy,original_court,original_code,original_number,original_year,case_type,judge_1,judge_2,judge_3,judge_4,judge_5,judge_6,judge_7,comingfor,outcome,...,organization_applicant,male_defendant,female_defendant,organization_defendant,legalrep,applicant_witness,defendant_witness,custody,other_details,court,activity_date,filed_date,next_date,case_number,concluded,registered,productivity,age,judgment_status,set_date,delivery_date,delivery_category
21074,12,Sep,2023,RRC,E1371,12,Sep,2023,0,0,0,0,Rent Restriction Case,Not Yet Assigned,,,,,,,Registration/Filing,Case Registered/Filed,...,0,2,0,0,No,0,0,0,"Case Registered and Paid via E-Payment,Outcome...",Rent Restriction,2023-09-12,2023-09-12,2023-10-02,Rent Restriction/RRC/E1371/2023,0,True,,0,Not Scheduled,NaT,NaT,
21492,2,Oct,2023,RRC,E1371,12,Sep,2023,0,0,0,0,Rent Restriction Case,Hilary Korir,,,,,,,Hearing,Judgment Delivered- Case Closed,...,0,2,0,0,No,1,0,0,,Rent Restriction,2023-10-02,2023-09-12,NaT,Rent Restriction/RRC/E1371/2023,1,False,Merit Resolution,20,Not Scheduled,NaT,NaT,


In [11]:
judgement_df.groupby(['court']).size().reset_index()

Unnamed: 0,court,0
0,Business Premises Rent,1526
1,Capital Markets,33
2,Co-operative,795
3,Communications And Multimedia Appeals,4
4,Energy And Petroleum Tribunal_Energy And Petro...,36
5,Hiv,265
6,Industrial Property,53
7,Land Acquisition,161
8,Legal Education Appeals,70
9,Micro And Small Enterprise,23


In [14]:
judgement_df[judgement_df['court'] == 'Rent Restriction'].to_csv(f'{file_path}/rrt_judgement_df.csv', index=False)

In [10]:
judgement_df.to_csv(f'{file_path}/judgement_df.csv', index=False)

In [72]:
def get_on_time_delivery_proportions(scheduled_cases):
    # Get the final status for each case
    final_status = scheduled_cases.groupby(['court', 'case_number']).last().reset_index()
    
    # Group by court and calculate statistics
    court_stats = final_status.groupby('court').agg({
        'case_number': 'count',
        'delivery_category': lambda x: (x == 'On Time').sum()
    }).rename(columns={
        'case_number': 'total_scheduled',
        'delivery_category': 'delivered_on_time'
    })
    
    # Calculate the proportion
    court_stats['proportion_on_time'] = court_stats['delivered_on_time'] / court_stats['total_scheduled']
    
    return court_stats

In [73]:
delayed_judgement = judgement_df[judgement_df['delivery_category'] == 'Delayed'].drop_duplicates(subset=['case_number'])
delayed_cases = judgement_df[judgement_df['delivery_category'] == 'Delayed'].drop_duplicates(subset=['case_number'])
delayed_judgement.to_csv(f'{file_path}/delayed_judgement.csv', index=False)

In [74]:
delayed_cases.to_csv(f'{file_path}/delayed_cases.csv', index=False)

In [49]:
proportions = get_on_time_delivery_proportions(judgement_df)

In [None]:
proportions

In [50]:
proportions.to_csv(f'{file_path}/scheduled_judgement.csv', index=False)

TODO: 
### Use the data used in df['judgment_status']=='Delivered'


In [51]:
def calculate_judgment_time(df):
    # Define the outcomes representing judgment date set and judgment delivered
    judgment_date_set_outcomes = ["Judgment Date Given", "Judgment On Notice", "Judgment Date Set"]
    judgment_delivered_outcomes = ["Grant Revoked", "Judgment Delivered", 
                                   "Judgment Delivered- Acquittal", "Judgment Delivered- Case Closed", 
                                   "Judgment Delivered- Convicted"]

    # Create a copy of the original DataFrame to avoid modifying it
    result = df.copy()

    result['time_taken_days'] = pd.NaT

    # Group by case_number
    grouped = df.groupby('case_number')

    for case_number, group in grouped:
        # Find the minimum judgment set date
        set_date = group[group['outcome'].isin(judgment_date_set_outcomes)]['activity_date'].min()
        
        # Find the maximum judgment delivered date
        delivery_date = group[group['outcome'].isin(judgment_delivered_outcomes)]['activity_date'].max()

        # If there is no set date but there is a delivery date and an earliest "Judgement" entry
        earliest_judgement_date = group[group['comingfor'] == "Judgement"]['activity_date'].min()
        if pd.isna(set_date) and pd.notna(delivery_date) and pd.notna(earliest_judgement_date):
            # Get dates for the same case before the earliest "Judgement"
            previous_dates = group[(group['activity_date'] < earliest_judgement_date) & (group['activity_date'] >= group['activity_date'].min())]['activity_date']

            if not previous_dates.empty:
                set_date = previous_dates.max()

        # If both dates are available, calculate the time taken
        if pd.notna(set_date) and pd.notna(delivery_date):
            time_taken_days = (delivery_date - set_date).days
            result.loc[result['case_number'] == case_number, 'set_date'] = set_date
            result.loc[result['case_number'] == case_number, 'delivery_date'] = delivery_date
            result.loc[result['case_number'] == case_number, 'time_taken_days'] = time_taken_days

    return result


In [52]:
time_taken = calculate_judgment_time(judgement_df)

  result.loc[result['case_number'] == case_number, 'time_taken_days'] = time_taken_days


In [62]:
time_taken.groupby(['court']).size().reset_index()

Unnamed: 0,court,0
0,Business Premises Rent,1526
1,Capital Markets,33
2,Co-operative,795
3,Communications And Multimedia Appeals,4
4,Energy And Petroleum Tribunal_Energy And Petro...,36
5,Hiv,265
6,Industrial Property,53
7,Land Acquisition,161
8,Legal Education Appeals,70
9,Micro And Small Enterprise,23


In [53]:
delivered_df = time_taken[time_taken['judgment_status']=='Delivered']

In [54]:
delivered_status = delivered_df.groupby(['court', 'case_number']).last().reset_index()

In [55]:
delivered_status['within_45_days'] = delivered_status['time_taken_days'] <= 45
# group the data by court and calculate the proportion of cases within 60 days
within_45_days = delivered_status.groupby('court').agg({
    'case_number': 'count',
    'within_45_days': lambda x: (x == True).sum()
}).rename(columns={
    'case_number': 'total_scheduled',
    'within_45_days': 'delivered_within_45_days'
})

within_45_days['proportion_within_45_days'] = within_45_days['delivered_within_45_days'] / within_45_days['total_scheduled']

In [57]:
within_45_days

Unnamed: 0_level_0,total_scheduled,delivered_within_45_days,proportion_within_45_days
court,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Business Premises Rent,22,14,0.636364
Capital Markets,7,1,0.142857
Co-operative,94,35,0.37234
Communications And Multimedia Appeals,1,1,1.0
Energy And Petroleum Tribunal_Energy And Petroleum Tribunal,5,3,0.6
Hiv,20,11,0.55
Industrial Property,6,2,0.333333
Land Acquisition,18,16,0.888889
Legal Education Appeals,10,9,0.9
Micro And Small Enterprise,2,2,1.0


In [56]:
within_45_days.to_csv(f'{file_path}/delivered_within_45_days.csv')

In [61]:
df[(df['court'] == 'The National Environment') & (df['outcome'] == 'Judgment Delivered- Case Closed')].groupby('comingfor')['comingfor'].count()

comingfor
Judgment    26
Ruling       2
Name: comingfor, dtype: int64

In [75]:
ppd_judge = df[(df['court'] == 'Political Parties Disputes') & (df['outcome'] == 'Judgment Delivered- Case Closed')]
#.groupby('comingfor')['comingfor'].count()

In [76]:
ppd_judge.groupby('outcome').size()

outcome
Judgment Delivered- Case Closed    31
dtype: int64