In [None]:
file_path = '/home/stanoo/dcrt/data'
df = pd.read_csv(f'{file_path}/processed_23-24_df.csv')

In [None]:
def determine_judgment_scheduling(df, cutoff_date):
    judgment_date_set_outcomes = ["Judgment Date Given", "Judgment On Notice", "Judgment Date Set"]
    judgment_delivered_outcomes = ["Grant Revoked", "Judgment Delivered", 
                                   "Judgment Delivered- Acquittal", "Judgment Delivered- Case Closed", 
                                   "Judgment Delivered- Convicted"]
    
    # Preprocessing: Filter and sort the DataFrame upfront
    df_filtered = df[df['outcome'].isin(judgment_date_set_outcomes + judgment_delivered_outcomes)]
    df_filtered = df_filtered.sort_values(by=['case_number', 'activity_date'])
    
    # Initialize columns
    df['judgment_status'] = 'Not Scheduled'
    df['set_date'] = pd.NaT
    df['delivery_date'] = pd.NaT
    df['delivery_category'] = ''
    
    # Filter rows with judgment set outcomes and valid schedule dates
    judgment_set_rows = df_filtered[df_filtered['outcome'].isin(judgment_date_set_outcomes) & 
                                    (df_filtered['next_date'] <= cutoff_date)]
    
    # For each case, find the earliest set date
    earliest_schedule = judgment_set_rows.groupby('case_number').first().reset_index()
    
    # Create dictionaries to map case numbers to their schedule dates and statuses
    case_to_set_date = dict(zip(earliest_schedule['case_number'], earliest_schedule['next_date']))
    case_to_status = {case: 'Scheduled' for case in earliest_schedule['case_number']}
    
    # Update the result dataframe with schedule information
    df['set_date'] = df['case_number'].map(case_to_set_date)
    df['judgment_status'] = df['case_number'].map(case_to_status).fillna('Not Scheduled')
    df['delivery_category'] = df['case_number'].map(case_to_status).fillna('')
    
    # Filter rows with judgment delivered outcomes
    judgment_delivered_rows = df_filtered[df_filtered['outcome'].isin(judgment_delivered_outcomes)]
    
    # Find the first delivery date after set date
    for case_number, group in earliest_schedule.groupby('case_number'):
        set_date = group['next_date'].values[0]
        delivery = judgment_delivered_rows[(judgment_delivered_rows['case_number'] == case_number) & 
                                           (judgment_delivered_rows['activity_date'] >= set_date)]
        
        if not delivery.empty:
            delivery_date = delivery.iloc[0]['activity_date']
            df.loc[df['case_number'] == case_number, 'delivery_date'] = delivery_date
            df.loc[df['case_number'] == case_number, 'judgment_status'] = 'Delivered'
            
            if delivery_date <= set_date:
                df.loc[df['case_number'] == case_number, 'delivery_category'] = 'On Time'
            else:
                df.loc[df['case_number'] == case_number, 'delivery_category'] = 'Delayed'
        else:
            earlier_delivery = judgment_delivered_rows[(judgment_delivered_rows['case_number'] == case_number) & 
                                                       (judgment_delivered_rows['activity_date'] < set_date)]
            if earlier_delivery.empty:
                if cutoff_date >= set_date:
                    df.loc[df['case_number'] == case_number, 'judgment_status'] = 'Delayed'
                    df.loc[df['case_number'] == case_number, 'delivery_category'] = 'Delayed'
            else:
                df.loc[df['case_number'] == case_number, 'delivery_date'] = earlier_delivery.iloc[0]['activity_date']
                df.loc[df['case_number'] == case_number, 'judgment_status'] = 'Delivered'
                df.loc[df['case_number'] == case_number, 'delivery_category'] = 'On Time'
    
    return df[df['set_date'].notna()]


def get_on_time_delivery_proportions(scheduled_cases):
    # Get the final status for each case
    final_status = scheduled_cases.groupby(['court', 'case_number']).last().reset_index()
    
    # Group by court and calculate statistics
    court_stats = final_status.groupby('court').agg({
        'case_number': 'count',
        'delivery_category': lambda x: (x == 'On Time').sum()
    }).rename(columns={
        'case_number': 'total_scheduled',
        'delivery_category': 'delivered_on_time'
    })
    
    # Calculate the proportion
    court_stats['proportion_on_time'] = court_stats['delivered_on_time'] / court_stats['total_scheduled']
    
    return court_stats

def create_pivot_table(df, case_type_timelines):
  # Calculate statistics using the modified function
  stats = get_case_resolution_proportions(df, case_type_timelines)

  # Pivot the data
  pivot_table = stats.pivot_table(
      index='court',
      columns='broad_case_type',
      values=['total_cases', 'resolved_within_timeline', 'proportion_resolved']
  )

  # Flatten the column hierarchy
  pivot_table.columns = [f"{col[1]}_{col[0]}" for col in pivot_table.columns]

  return pivot_table


def get_case_resolution_proportions(df, case_type_timelines):
  # Convert date columns to datetime
  date_columns = ['filed_date', 'activity_date']
  for col in date_columns:
    df[col] = pd.to_datetime(df[col])

  # Filter cases based on case_type_timelines
  df = df[df['broad_case_type'].isin(PMMU_TIME_LINES.keys())]

  # Function to check if a case is resolved within timeline
  def is_resolved_within_timeline(row):
    timeline = case_type_timelines.get(row['broad_case_type'], float('inf'))
    return row['concluded'] == 1 and row['age'] <= timeline

  # Group by court and case type, then calculate statistics
  stats = df.groupby(['court', 'broad_case_type']).apply(
    lambda x: pd.Series({
      'total_cases': len(x),
      'resolved_within_timeline': sum(x.apply(is_resolved_within_timeline, axis=1)),
      'proportion_resolved': sum(x.apply(is_resolved_within_timeline, axis=1)) / len(x) if len(x) > 0 else 0
    })
  ).reset_index()

  return stats


def create_case_resolution_pivot(df, case_type_timelines):
    # Prepare the aggfunc dictionary
    aggfunc = {}
    for case_type, timeline in case_type_timelines.items():
        column = f"{case_type}_{timeline}_days"
        aggfunc[f"{column}_total"] = 'count'
        aggfunc[f"{column}_resolved"] = 'sum'
        aggfunc[f"{column}_proportion"] = lambda x: x.sum() / x.count() if x.count() > 0 else 0

    # Create pivot table
    pivot = pd.pivot_table(
        df,
        values=[f"{case_type}_{timeline}_days" for case_type, timeline in case_type_timelines.items()],
        index=['court'],
        aggfunc=aggfunc
    )

    # Rename columns to match the desired format
    new_columns = []
    for case_type, timeline in case_type_timelines.items():
        for stat in ['total', 'resolved', 'proportion']:
            new_columns.append(f"{case_type}_{timeline}_days_{stat}")
    
    pivot.columns = new_columns

    return pivot

