In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from warnings import filterwarnings
filterwarnings('ignore')

In [4]:
df = pd.read_csv('IAS_Cleaned.csv', keep_default_na=False)
data = pd.read_csv('IAS_Cleaned.csv', na_values = ['N/A', 'N.A.'])

In [5]:

fig = px.pie(data, names='Gender', title='Gender Composition of Officers (in %)', hole = 0.4, height = 450)
fig.update_layout(title_x=0.5)
fig.update_layout(legend=dict(yanchor="top", y=0.75, xanchor="left", x=0.63))

fig.show()

In [6]:

fig = px.pie(data, names='Source of Recruitment', title='Candidate Recruitment Breakdown by Source ', hole = 0.4, height = 450)
fig.update_layout(title_x=0.52)

fig.update_layout(legend=dict(yanchor="top", y=0.75, xanchor="left", x=0.67))

fig.show()


In [7]:

fig = px.pie(data, names='Qual Type', title='Level of Qualification (in %)', hole = 0.4, height = 450)
fig.update_layout(title_x=0.5)

fig.update_layout(legend=dict(yanchor="top", y=0.75, xanchor="left", x=0.61))

fig.show()


In [8]:

fig = px.pie(data, names='Job Status', title='Proportion of Officers by Job Status', hole = 0.4, height = 450)
fig.update_layout(title_x=0.5)
fig.update_layout(legend=dict(yanchor="top", y=0.75, xanchor="left", x=0.62))


fig.show()


In [9]:

fig = px.pie(data, names='Pay Level', title='Distribution of Officers Across Pay Levels', hole = 0.4, height = 450)
fig.update_layout(title_x=0.47)
fig.update_layout(legend=dict(yanchor="top", y=0.85, xanchor="left", x=0.2))
fig.update_traces(textinfo='value')

fig.show()


In [10]:

fig = px.bar(data, x = 'Cadre', color = 'Gender', title = 'Gender Composition of Officers\' Cadre', height = 550)
fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))
fig.update_layout(legend=dict(yanchor="top", y=1, xanchor="left", x=1))

fig.show()

fig.write_html('fig2.html')


In [11]:

fig = px.bar(data, x = 'Domicile', color = 'Gender', title = 'Representation of Gender Across Officers\' Domicile', height = 550)
fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))
fig.show()


In [12]:

temp = data.groupby(['Gender', 'Allotment Year']).agg({'Gender': 'count'})
temp.columns = ['count']
temp = temp.reset_index()
temp = temp[temp['Allotment Year'] < 2024]

fig = px.line(temp, x = 'Allotment Year', y = 'count', color='Gender', symbol = 'Gender',
             title = 'Year-wise Allotment of Services by Gender', height = 550)
fig.update_layout(title_x=0.5)
fig.show()


In [13]:

fig = px.histogram(data, x="Age", color="Gender", title = 'Age Demographics Disaggregated by Gender', height = 550,
                   marginal="box",
                   )
fig.update_layout(title_x=0.5)
fig.show()


In [14]:

# temp = data[data['Job Status'] == 'Active']
# fig = px.box(data, y = 'Service Tenure', color = 'Gender', height = 500, width = 700,
#                    title = 'Gender-wise Distribution of Service Tenure')

fig = px.histogram(data, x ="Service Tenure", color="Gender", title = 'Trend in Service Tenure Across Gender', height = 550,
                   marginal="box", )


fig.update_layout(title_x=0.5)
fig.show()


In [15]:

fig = px.box(data, y = 'Age', color = 'Cadre', title = 'Cadre-wise Distribution of Age', height = 550, x = 'Cadre')

fig.update_layout(title_x=0.5)
fig.show()


In [16]:

temp = data[data['Job Status'] == 'Active']
fig = px.box(data, y = 'Service Tenure', color = 'Cadre', height = 600, x = 'Cadre',
                   title = 'Cadre-wise Distribution of Service Tenure')
fig.update_layout(title_x=0.5)
fig.show()


In [17]:

fig = px.bar(data, x = 'Cadre', color = 'Source of Recruitment', title = 'Analysing Cadre Allocation Through Recruitment Channels', height = 500)
fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))
fig.show()


In [18]:


temp = data.groupby(['Source of Recruitment', 'Allotment Year']).agg({'Source of Recruitment': 'count'})
temp.columns = ['count']
temp = temp.reset_index()
temp = temp[temp['Allotment Year'] < 2024]

fig = px.line(temp, x = 'Allotment Year', y = 'count', color='Source of Recruitment', symbol='Source of Recruitment',
             title = 'Comparing Recruitment Channels Across Years', height = 550)
fig.update_layout(title_x=0.5)
fig.show()


In [19]:

fig = px.bar(data, x = 'Cadre', color = 'Qual Type', title = 'Educational Qualification by Cadre', height = 550,
             labels = {'Qual Type': 'Level of Qualification'})
fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))

fig.show()


In [20]:

fig = px.bar(data, x = 'Domicile', color = 'Qual Type', title = 'Comparison of Educational Levels Across Different Domicile', height = 550,
             labels = {'Qual Type': 'Level of Qualification'})

fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))

fig.show()


In [21]:


temp = data.groupby(['Qual Type', 'Allotment Year']).agg({'Qual Type': 'count'})
temp.columns = ['count']
temp = temp.reset_index()
temp = temp[temp['Allotment Year'] < 2024]

fig = px.line(temp, x = 'Allotment Year', y = 'count', color='Qual Type',
             title = 'Trends in Educational Attainment by Year', height = 550, symbol='Qual Type',
             labels = {'Qual Type': 'Level of Qualification'})

fig.update_layout(title_x=0.5)
fig.show()


In [22]:

fig = px.bar(data, x = 'Cadre', color = 'Job Status', title = 'Cadre-wise Job Status of Officers', height = 550, )
fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))

fig.show()


In [23]:


temp = data.groupby(['Job Status', 'Allotment Year']).agg({'Job Status': 'count'})
temp.columns = ['count']
temp = temp.reset_index()
temp = temp[temp['Allotment Year'] < 2024]


fig = px.line(temp, x = 'Allotment Year', y = 'count', color='Job Status',
             title = 'Comparison of Job Status Over the Years', height = 550, symbol='Job Status')

fig.update_layout(title_x=0.5)
fig.show()


In [24]:

fig = px.bar(data, x = 'Pay Level', color = 'Qual Type', title = 'Comparison of Pay Level by Level of Educational Qualification', height = 550,
            labels = {'Qual Type': 'Level of Qualification'})
fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))

fig.show()


In [25]:

temp = data[['Pay Level', 'Age', 'Service Tenure']]
temp['Pay Level'] = temp['Pay Level'].apply(lambda x: int(x.strip().split(' ')[1]))



In [27]:
corr_mat = np.round(temp.corr(), 2)

fig = px.imshow(corr_mat, height = 500, text_auto=True,
                title='Age, Service Tenure, and Pay Level (trivariate analysis)',
                color_continuous_scale='Viridis')


fig.update_layout(title_x=0.5)

fig.show()

In [28]:

temp = data.groupby(['Pay Level', 'Allotment Year']).agg({'Pay Level': 'count'})
temp.columns = ['count']
temp = temp.reset_index()
temp = temp[temp['Allotment Year'] < 2024]


fig = px.line(temp, x = 'Allotment Year', y = 'count', color = 'Pay Level', height = 550,
              title = 'Tracking Pay Level Trend Across Years', symbol='Pay Level')


fig.update_layout(title_x=0.5)
fig.show()


In [29]:
temp = data.groupby(['Source of Recruitment', 'Pay Level']).agg({'Pay Level': 'count'})
temp.columns = ['count']
temp = temp.reset_index()

temp = temp.groupby(['Source of Recruitment', 'Pay Level']).sum().unstack().fillna(0)
temp.columns = temp.columns.droplevel(0)

fig = px.imshow(temp, height = 450, text_auto=True,
                title='Comparison between Source of Recruitment and Pay Level',
                color_continuous_scale='Viridis')


fig.update_layout(title_x=0.5)

fig.show()

In [30]:
temp = data.groupby(['Gender', 'Pay Level']).agg({'Pay Level': 'count'})
temp.columns = ['count']
temp = temp.reset_index()

temp = temp.groupby(['Gender', 'Pay Level']).sum().unstack().fillna(0)
temp.columns = temp.columns.droplevel(0)

fig = px.imshow(temp, height = 450, text_auto=True,
                title='Comparison between Gender and Pay Level',
                color_continuous_scale='Viridis')

fig.update_layout(title_x=0.5)

fig.show()

In [31]:
temp = data.groupby(['Qual Type', 'Pay Level']).agg({'Pay Level': 'count'})
temp.columns = ['count']
temp = temp.reset_index()

temp = temp.groupby(['Qual Type', 'Pay Level']).sum().unstack().fillna(0)
temp.columns = temp.columns.droplevel(0)

import plotly.graph_objs as go

fig = px.imshow(temp, height = 450, text_auto=True,
                title='Comparison between Level of Qualification and Pay Level',
                labels=dict(x="Pay Level", y="Level of Qualification"),
                color_continuous_scale='Viridis')


fig.update_layout(title_x=0.5)

fig.show()

In [32]:
temp = df.copy()

qual1 = (df['Qual 1'].value_counts())
c = qual1 > 50
qual1 = qual1[c].index

temp['Qual 1'] = temp['Qual 1'].apply(lambda x: 'Other' if x not in qual1 else x)



In [33]:

fig = px.bar(temp, x = 'Qual 1', height = 550, labels = {'Qual 1': 'Qualification'}, color='Gender',
             title = 'Educational Attainment by Gender',)

fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))

fig.show()


In [34]:

fig = px.bar(temp, x = 'Qual 1', height = 550, labels = {'Qual 1': 'Qualification'}, color='Source of Recruitment',
             title = 'Officers\' Recruitment and Qualification (Cross Analysis)',)

fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))

fig.show()


In [35]:

temp = temp.groupby(['Qual 1', 'Allotment Year']).agg({'Qual 1': 'count'})
temp.columns = ['count']
temp = temp.reset_index()
temp = temp[temp['Allotment Year'] < 2024]

fig = px.line(temp, x = 'Allotment Year', y = 'count', color = 'Qual 1', height = 550,
              title = 'Trend of Educational Qualification', symbol='Qual 1',
              labels = {'Qual 1': 'Qualification'})


fig.update_layout(title_x=0.5)
fig.show()


In [36]:

fig = px.bar(data, x = 'Post Type', height = 550, color = 'Gender',
             title = 'Gender Distribution of Officers by Post',)

fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))

fig.show()


In [37]:

fig = px.bar(data, x = 'Post Type', height = 550, color = 'Qual Type',
             title = 'Officer Qualifications by Post Type',)

fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))

fig.show()


In [38]:

def get_inactive(x):
  x = x.lower()

  if 'leave' in x:
    return 'On Leave'
  elif 'sus' in x:
    return 'Under Supsension'
  elif 'wait' in x:
    return 'On Waiting'


temp = data[data['Post Type'] == 'Inactive']
inactive_posts = temp['Current Post'].apply(get_inactive)

fig = px.pie(inactive_posts, names='Current Post', title='Inactive Officers', hole = 0.4, height = 450)
fig.update_layout(title_x=0.5)

fig.update_layout(legend=dict(yanchor="top", y=0.8, xanchor="left", x=0.65))
fig.update_traces(textinfo='value')

fig.show()



In [40]:
def get_commr(x):
  x = x.lower()

  if 'joint' in x or 'jt' in x:
    return 'Joint Commissioner'
  elif 'add' in x:
    return 'Additional Commissioner'
  elif 'ass' in x:
    return 'Assistant Commissioner'
  elif 'dep' in x or 'dy' in x:
    return 'Deputy Commissioner'
  elif 'spe' in x:
    return 'Special Commissioner'
  else:
    return "Other Commissioner"


temp = data[data['Post Type'] == 'Commissioner']
commr = temp['Current Post'].apply(get_commr)

fig = px.pie(commr, names='Current Post', title='Proportion of Commissioners by Type', hole = 0.4, height = 450)
fig.update_layout(title_x=0.5)

fig.update_layout(legend=dict(yanchor="top", y=0.8, xanchor="left", x=0.65))
fig.update_traces(textinfo='value')

fig.show()


In [41]:
def get_maggie(x):
  x = x.lower()

  if 'add' in x:
    return 'Additional Magistrate'
  elif 'dist' in x:
    return 'District Magistrate'
  else:
    return x.capitalize()

temp = data[data['Post Type'] == 'Magistrate']
maggie = temp['Current Post'].apply(get_maggie)

fig = px.pie(maggie, names='Current Post', title='Proportion of Magistrate by Type', hole = 0.4, height = 450)
fig.update_layout(title_x=0.5)

fig.update_layout(legend=dict(yanchor="top", y=0.8, xanchor="left", x=0.65))
fig.update_traces(textinfo='value')

fig.show()

In [42]:
def get_collector(x):
  x = x.lower()

  if 'joint' in x or 'jt' in x:
    return 'Joint Collector'
  elif 'add' in x:
    return 'Additional Collector'
  elif 'ass' in x:
    return 'Assistant Collector'

  elif 'dis' in x or 'dt' in x:
    return 'District Collector'
  else:
    return "Other Collector"


temp = data[data['Post Type'] == 'Collector']
coll = temp['Current Post'].apply(get_collector)

fig = px.pie(coll, names='Current Post', title='Proportion of Collectors by Type', hole = 0.4, height = 450)
fig.update_layout(title_x=0.5)

fig.update_layout(legend=dict(yanchor="top", y=0.8, xanchor="left", x=0.65))
fig.update_traces(textinfo='value')

fig.show()



In [43]:

def get_secretary(x):
  x = x.lower()

  if 'joint' in x or 'jt' in x:
    return 'Joint Secretary'
  elif 'add' in x:
    return 'Additional Secretary'
  elif 'ass' in x:
    return 'Assistant Secretary'
  elif 'dep' in x or 'dy' in x:
    return 'Deputy Secretary'
  elif 'spe' in x:
    return 'Special Secretary'
  else:
    return "Other Secretary"

temp = data[data['Post Type'] == 'Secretary']
coll = temp['Current Post'].apply(get_secretary)

fig = px.pie(coll, names='Current Post', title='Proportion of Secretary by Type', hole = 0.4, height = 450)
fig.update_layout(title_x=0.5)

fig.update_layout(legend=dict(yanchor="top", y=0.8, xanchor="left", x=0.65))
fig.update_traces(textinfo='value')

fig.show()

In [44]:
temp = data.copy()

loc = (data['Location'].value_counts())
c = loc > 100
loc = loc[c].index

temp['Location'] = temp['Location'].apply(lambda x: 'Other' if x not in loc else x)
# temp =
loc = temp[temp['Location'] != 'Other']['Location'].value_counts()
loc

fig = px.bar(x = loc.index, y = loc, height = 550,
             title = 'Frequency of Officers\' Postings by Location (Top 10)',
             labels = {'x': 'Location', 'y': 'count'})

fig.update_layout(title_x=0.5)
fig.update_traces(dict(marker_line_width=0))

fig.show()


In [50]:
fig = px.pie(data, names='Department Type', title='Composition of Department Type', hole = 0.4, height = 450)
fig.update_layout(title_x=0.5)

fig.update_layout(legend=dict(yanchor="top", y=0.95, xanchor="left", x=0.65))
fig.update_traces(textinfo='value', rotation=0, textposition='inside')

fig.show()