## Importing the libraries

In [1]:
# !sudo apt-get install python3-pip
# !sudo python3 -m pip install ipykernel
# !pip3 install pandas

import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


#### Function to read the csv files

In [2]:
def read_csv(filename):
    with open(filename) as f:
        reader = csv.reader(f)
        for row in reader:
            print(row)

In [3]:
# read_csv("./judges_clean.csv")


# Converting the CSV files to Pandas dataframe

In [4]:
df_judges = pd.read_csv("./judges_clean.csv")
df_judges.head(10)


Unnamed: 0,ddl_judge_id,state_code,dist_code,court_no,judge_position,female_judge,start_date,end_date
0,1,1,1,1,chief judicial magistrate,0 nonfemale,20-09-2013,20-02-2014
1,2,1,1,1,chief judicial magistrate,0 nonfemale,31-10-2013,20-02-2014
2,3,1,1,1,chief judicial magistrate,0 nonfemale,21-02-2014,31-05-2016
3,4,1,1,1,chief judicial magistrate,0 nonfemale,01-06-2016,06-06-2016
4,5,1,1,1,chief judicial magistrate,0 nonfemale,06-06-2016,07-07-2018
5,6,1,1,1,chief judicial magistrate,1 female,09-07-2018,
6,7,1,1,1,civil judge junior division,0 nonfemale,01-06-2011,09-06-2013
7,8,1,1,1,civil judge junior division,0 nonfemale,06-06-2011,08-06-2014
8,9,1,1,1,civil judge junior division,0 nonfemale,11-06-2012,31-05-2015
9,10,1,1,1,civil judge junior division,0 nonfemale,29-08-2013,08-06-2014


In [5]:
df_judges.head(10)
print("The number of unique type of gender of judges in the data : ")
df_judges["female_judge"].unique()


The number of unique type of gender of judges in the data : 


array(['0 nonfemale', '1 female', '-9998 unclear', nan], dtype=object)

In [6]:
print("The count of those types : ")
df_judges["female_judge"].value_counts()


The count of those types : 


0 nonfemale      67540
1 female         27202
-9998 unclear     3735
Name: female_judge, dtype: int64

In [7]:
#Removing the -9998 unclear rows
df_judges=df_judges[df_judges["female_judge"] != "-9998 unclear"]
df_judges["female_judge"].unique()


array(['0 nonfemale', '1 female', nan], dtype=object)

In [8]:
# Converting the strings to numbers
# "0 nonfemale" & to 0
# "1 female" to 1
# df_judges.loc.__setitem__((slice(None),("female_judge","1 female")),0)
df_judges["female_judge"]=(df_judges["female_judge"]=="1 female").astype(int)
df_judges.head(10)

Unnamed: 0,ddl_judge_id,state_code,dist_code,court_no,judge_position,female_judge,start_date,end_date
0,1,1,1,1,chief judicial magistrate,0,20-09-2013,20-02-2014
1,2,1,1,1,chief judicial magistrate,0,31-10-2013,20-02-2014
2,3,1,1,1,chief judicial magistrate,0,21-02-2014,31-05-2016
3,4,1,1,1,chief judicial magistrate,0,01-06-2016,06-06-2016
4,5,1,1,1,chief judicial magistrate,0,06-06-2016,07-07-2018
5,6,1,1,1,chief judicial magistrate,1,09-07-2018,
6,7,1,1,1,civil judge junior division,0,01-06-2011,09-06-2013
7,8,1,1,1,civil judge junior division,0,06-06-2011,08-06-2014
8,9,1,1,1,civil judge junior division,0,11-06-2012,31-05-2015
9,10,1,1,1,civil judge junior division,0,29-08-2013,08-06-2014


### Checking the cases that were closed within a year

In [9]:
closed_within_year=0
not_closed_within_year=0
df_judges["start_date"]=pd.to_datetime(df_judges["start_date"])
df_judges["end_date"]=pd.to_datetime(df_judges["end_date"])

df_judges.head()
temp_df=(df_judges["end_date"]-df_judges["start_date"])
temp_df=temp_df.dt.days
temp_df=temp_df[temp_df<=1500] #it was giving values upto 8000 so I limited it to 1500



closed_within_year=temp_df[temp_df<=365].count()
not_closed_within_year=temp_df[temp_df>365].count()


print("Closed within a year : ",closed_within_year)
print("Not closed within a year : ",not_closed_within_year)


Closed within a year :  44120
Not closed within a year :  30932


### Count the number of judges in each state


In [10]:
# Count the number of judges in each state
judgesperstate=df_judges["state_code"].value_counts()
df_judgesperstate=pd.DataFrame(judgesperstate)

df_judgesperstate["State_code"]=df_judgesperstate.index
df_judgesperstate.rename(columns={"state_code":"Number_of_judges"},inplace=True)
df_judgesperstate.reset_index(drop=True,inplace=True)
df_judgesperstate["State_name"]="temp"

print(df_judgesperstate)


    Number_of_judges  State_code State_name
0              21987           1       temp
1              13999          13       temp
2               6440           3       temp
3               5583          23       temp
4               5390           9       temp
5               4949          17       temp
6               4121           8       temp
7               3655          10       temp
8               3492           2       temp
9               3060           4       temp
10              2677          16       temp
11              2568          29       temp
12              2321          14       temp
13              2283           7       temp
14              2034          11       temp
15              1921          22       temp
16              1550           6       temp
17              1457          26       temp
18              1078          15       temp
19              1032          18       temp
20               899          12       temp
21               897           5

In [11]:
# Getting the state codes 

df_statecodes=pd.read_csv("./keys/cases_state_key.csv")

#storing only on basis of 2018 codes
df_statecodes=df_statecodes[df_statecodes["year"]==2018]
df_statecodes.drop(["year","pc11_state_name","pc11_state_id"],axis=1,inplace=True)
df_statecodes.sort_values(by="state_code",inplace=True)
df_statecodes.reset_index(drop=True,inplace=True)
print(df_statecodes)


    state_code         state_name
0            1        Maharashtra
1            2     Andhra Pradesh
2            3          Karnataka
3            4             Kerala
4            5   Himachal Pradesh
5            6              Assam
6            7          Jharkhand
7            8              Bihar
8            9          Rajasthan
9           10         Tamil Nadu
10          11             Orissa
11          12  Jammu and Kashmir
12          13      Uttar Pradesh
13          14            Haryana
14          15        Uttarakhand
15          16        West Bengal
16          17            Gujarat
17          18       Chhattisgarh
18          19            Mizoram
19          20            Tripura
20          21          Meghalaya
21          22             Punjab
22          23     Madhya Pradesh
23          24             Sikkim
24          25            Manipur
25          26              Delhi
26          27         Chandigarh
27          29          Telangana
28          30

In [14]:
#assigning the state names to the state codes
for i in range(len(df_judgesperstate)):
    df_judgesperstate.loc[i,"State_name"]=df_statecodes.loc[i,"state_name"]

df_judgesperstate.drop("State_code",axis=1,inplace=True)
print(df_judgesperstate)


    Number_of_judges         State_name
0              21987        Maharashtra
1              13999     Andhra Pradesh
2               6440          Karnataka
3               5583             Kerala
4               5390   Himachal Pradesh
5               4949              Assam
6               4121          Jharkhand
7               3655              Bihar
8               3492          Rajasthan
9               3060         Tamil Nadu
10              2677             Orissa
11              2568  Jammu and Kashmir
12              2321      Uttar Pradesh
13              2283            Haryana
14              2034        Uttarakhand
15              1921        West Bengal
16              1550            Gujarat
17              1457       Chhattisgarh
18              1078            Mizoram
19              1032            Tripura
20               899          Meghalaya
21               897             Punjab
22               438     Madhya Pradesh
23               324             Sikkim
