In [1]:
import datetime
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
from pandas import Timestamp as ts
import matplotlib.pyplot as plt
import matplotlib.style as style
import seaborn as sns

In [2]:
%matplotlib inline

In [3]:
Path.cwd()

PosixPath('/Users/jwt/PycharmProjects/cpi/WDBA2023')

In [4]:
today = datetime.datetime.today()
input_file = Path.cwd() / 'data_files' / 'wdba_stats_processed_2022_2023Jun-04-23.csv'
report_dir = Path.cwd() / 'reports'
report_file = report_dir /f"WDBA_Stats_{today:%b-%d-%y}.xlsx"


In [5]:
# Helper function to annualize projections
def annualized_projection(value: int, num_months: int)-> int:
    """
    Annualize projections given a value and elapsed months

    Parameters
    ----------
    value : int
        Value to be annualized
    num_months : int
        Number of months elapsed
    """
    return int(round(value * (12 / num_months), 0))

In [6]:
df = pd.read_csv(input_file, parse_dates=['date_filed', "date_reopened", "date_terminated"],
                 dtype={'group': 'category' , 'is_prose':'category'})

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1624 entries, 0 to 1623
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   case_number       1624 non-null   object        
 1   judge             1624 non-null   object        
 2   date_filed        1624 non-null   datetime64[ns]
 3   date_reopened     92 non-null     datetime64[ns]
 4   date_terminated   1065 non-null   datetime64[ns]
 5   NOS               1624 non-null   int64         
 6   IsProse           1624 non-null   object        
 7   group             1624 non-null   category      
 8   is_prose          1624 non-null   category      
 9   statistical_year  1624 non-null   int64         
dtypes: category(2), datetime64[ns](3), int64(2), object(3)
memory usage: 105.5+ KB


In [8]:
df.head()

Unnamed: 0,case_number,judge,date_filed,date_reopened,date_terminated,NOS,IsProse,group,is_prose,statistical_year
0,3:21-cv-00217-jdp,jdp,2021-04-01,NaT,2022-07-26,751,n,Labor,counseled,2022
1,3:21-cv-00218-wmc,wmc,2021-04-01,NaT,2022-01-03,863,n,Social Security,counseled,2022
2,3:21-cv-00215-wmc,wmc,2021-04-01,NaT,2023-01-31,550,y,Prisoner Petitions,prose,2022
3,3:21-cv-00219-jdp,jdp,2021-04-01,NaT,2021-11-30,864,n,Social Security,counseled,2022
4,3:21-cv-00220-wmc,wmc,2021-04-02,NaT,2022-09-02,330,n,Torts/Personal Injury,counseled,2022


In [9]:
df.tail()

Unnamed: 0,case_number,judge,date_filed,date_reopened,date_terminated,NOS,IsProse,group,is_prose,statistical_year
1619,3:20-cv-00098-wmc,wmc,2020-01-31,2023-01-25,2023-05-12,830,n,Intellectual Property Rights,counseled,2023
1620,3:20-cv-00019-jdp,jdp,2020-01-09,2023-02-13,NaT,830,n,Intellectual Property Rights,counseled,2023
1621,3:18-cv-01003-wmc,wmc,2018-12-04,2023-02-24,NaT,550,y,Prisoner Petitions,prose,2023
1622,3:18-cv-00116-wmc,wmc,2018-02-19,2023-03-24,NaT,550,y,Prisoner Petitions,prose,2023
1623,3:20-cv-00242-wmc,wmc,2020-03-18,2023-03-31,NaT,442,n,Civil Rights,counseled,2023


In [10]:
case_breakdown_pro_se_vs_counseled = df.groupby(['statistical_year', 'is_prose'])['case_number'].count()

In [11]:
case_breakdown_pro_se_vs_counseled.unstack()

is_prose,counseled,prose
statistical_year,Unnamed: 1_level_1,Unnamed: 2_level_1
2022,473,354
2023,419,378


In [12]:
pd.crosstab(df['statistical_year'],
            df['is_prose'],
            aggfunc='count',
            values=df['case_number'],
            margins=True,
            margins_name='Total',
            normalize='index')

is_prose,counseled,prose
statistical_year,Unnamed: 1_level_1,Unnamed: 2_level_1
2022,0.571947,0.428053
2023,0.525721,0.474279
Total,0.549261,0.450739


In [13]:
pd.pivot_table(df,
               index=['statistical_year'],
               columns=['is_prose'],
               values=['case_number'],
               aggfunc='count',
               margins=True,
               margins_name='Total',
               fill_value=0,
               dropna=False,
               observed=False)

Unnamed: 0_level_0,case_number,case_number,case_number
is_prose,counseled,prose,Total
statistical_year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2022,473,354,827
2023,419,378,797
Total,892,732,1624


In [14]:
nos_totals_by_year = pd.crosstab(df['statistical_year'],
            df['group'],
            aggfunc='count',
            values=df['case_number'],
            margins=True,
            margins_name='Total')

In [None]:
# Annualize projections for 2023
nos_totals_by_year.loc[2023,:] = nos_totals_by_year.loc[2023,:].apply(lambda x: annualized_projection(x, 11))

In [15]:
nos_totals_by_year

group,Bankruptcy,Civil Rights,Contract,Federal Tax Suits,Forfeiture/Penalty,Habeas Corpus,Immigration,Intellectual Property Rights,Labor,Other Statutes,Personal Property,Prisoner Petitions,Real Property,Social Security,Torts/Personal Injury,Total
statistical_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2022,5,134,53,3,5,67,8,29,51,89,14,221,5,83,60,827
2023,4,146,49,7,7,77,9,31,51,46,12,207,13,95,43,797
Total,9,280,102,10,12,144,17,60,102,135,26,428,18,178,103,1624


In [18]:
pct_change=nos_totals_by_year.pct_change()

In [25]:
round(pct_change.loc[2023]*100, 2)

group
Bankruptcy                      -20.00
Civil Rights                      8.96
Contract                         -7.55
Federal Tax Suits               133.33
Forfeiture/Penalty               40.00
Habeas Corpus                    14.93
Immigration                      12.50
Intellectual Property Rights      6.90
Labor                             0.00
Other Statutes                  -48.31
Personal Property               -14.29
Prisoner Petitions               -6.33
Real Property                   160.00
Social Security                  14.46
Torts/Personal Injury           -28.33
Total                            -3.63
Name: 2023, dtype: float64