## Reading Data and Getting Summaries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

crime = pd.read_csv("../input/daily-inmates-in-custody.csv")
crime.tail()

In [None]:
crime.info()

### Univariate Data

In [None]:
df = crime["AGE"].dropna()
df.plot.hist()
plt.title("Ages of Prisoners Distribution")
plt.xlabel("Age")
plt.savefig("AgeDistribution.png")

In [None]:
df = crime["AGE"].dropna()
sum(df.between(20,40))/len(df)

#### It is clear that most of the prisoners incarcerated are in their 20-40's as together they make up 63.42% of the prison population.  

In [None]:
sns.countplot(x = "GENDER", data = crime)
plt.title("Genders of Prisoners Distribution")
plt.savefig("GenderDistribution.png")

In [None]:
df = crime["GENDER"].dropna()
df = df.map({'M' :1, 'F' :0})
sum(df)/len(df)

#### There is an overwhelming gender inequality in prison with 93.75% of the prison population being male. 

In [None]:
df = crime["RACE"].dropna()
race_df = pd.get_dummies(df)
race_df.head()
heights = []
bars = ['A', 'B', 'I', 'O', 'U', 'W']
for i in list(race_df.columns.values):
    heights.append(race_df[i].sum())
y_pos = np.arange(len(bars))
figure = plt.bar(y_pos, heights)
plt.xticks(y_pos, bars)
plt.title("Bar Plot of Prisoner Race")
plt.show()

plt.savefig('race_bar.png')

In [None]:
df = crime["RACE"].dropna()
df = df.map({'B' :1, 'A' :0, 'I' :0, 'O' :0, 'U' :0, 'W' :0})
sum(df)/len(df)

#### It is clear to see that the majority of the prisoners are African American as they make up 56.03% of the prison population. 

In [None]:
sns.countplot(x = "INFRACTION", data = crime)
plt.title("Number of Infractions Committed")
plt.savefig("NumInfractions.png")

In [None]:
df = crime['INFRACTION'].dropna()
df = df.map({'Y' :1, 'N' :0})
sum(df)/len(df)

#### It is clear to see that most of the prisoners do not have an infraction. Only 30.09% of the prisoners have an infraction

In [None]:
sns.countplot(x = "BRADH", data = crime)
plt.title("Number of Prisoners under Mental Observation")
plt.xlabel("Mental Observation")
plt.savefig("MentalObservation.png")

In [None]:
df = crime['BRADH'].dropna()
df = df.map({'Y' :1, 'N' :0})
sum(df)/len(df)

#### It is clear to see that a lot of prisoners (43.89%) are under mental observation. 

In [None]:
sns.countplot(x = "SRG_FLG", data = crime)
plt.title("Number of Prisoners with Gang Affiliation")
plt.xlabel("Gang Affiliation")
plt.savefig("GangAffiliation.png")

In [None]:
df = crime['SRG_FLG'].dropna()
df = df.map({'Y': 1, 'N': 0})
sum(df)/len(df)

#### It is clear to see that not many prisoners have gang affiliations. Only 14.20% of prisoners have gang affiliations. 

## Bivariate Data

In [None]:
sns.countplot(x = 'CUSTODY_LEVEL', hue = 'RACE', data = crime)
plt.title('Level of Security of Prisoners by Race')
plt.savefig('securityvsrace.png')

#### Most prisoners exist in the medium level of security with a similar ratio between the races. 

In [None]:
sns.countplot(x = 'CUSTODY_LEVEL', hue = 'GENDER', data = crime)
plt.title('Level of Security of Prisoners by Gender')
plt.savefig('securityvsgender.png')

#### This graph also supports the hypothesis that most of the prison population, for both genders, exist in the medium level of security 

In [None]:
sns.countplot(x = "BRADH", hue = 'GENDER', data = crime)
plt.title("Number of Prisoners under Mental Observation by Gender")
plt.savefig("MentalObservationbyGender.png")

#### It is clear to see that more women are under mental observation than men. 

In [None]:
sns.countplot(x = "BRADH", hue = 'RACE', data = crime)
plt.title("Number of Prisoners under Mental Observation by Race")
plt.savefig("MentalObservationRace.png")

#### There exists a similar ratio between the races in and out of mental observation. 

In [None]:
sns.countplot(x = "BRADH", hue = "INFRACTION", data = crime)
plt.title("Mental Observation by Infractions Committed")
plt.tight_layout()
plt.savefig("MentalObservationInfraction.png")

#### More prisoners who have committed an infraction are also under mental observation

In [None]:
sns.countplot(x = "SRG_FLG", hue = "RACE", data = crime)
plt.title("Number of Prisoners with Gang Affiliation by Race")
plt.xlabel("Gang Affiliation")
plt.savefig("GangAffiliationRace.png")

#### There exists a similar ratio between races and having gang affiliations. 

In [None]:
sns.countplot(x = "SRG_FLG", hue = "GENDER", data = crime)
plt.title("Number of Prisoners with Gang Affiliation by Gender")
plt.xlabel("Gang Affiliation")
plt.savefig("GangAffiliationGender.png")

In [None]:
#### There exists a similar ratio between genders and having gang affiliations.  