## EDA of US Juvenile Crime Data

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import seaborn as sns

In [None]:
data = pd.read_csv("../input/us-juvenile-arrests-by-crime/arrests_national_juvenile.csv")
data

In [None]:
data.isnull().sum()

In [None]:
data = data.drop('state_abbr', axis = 1)
data = data.dropna()
data

In [None]:
male = data[['m_0_9', 'm_10_12',
       'm_13_14', 'm_15', 'm_16', 'm_17']]

female = data[['f_0_9', 'f_10_12', 'f_13_14',
       'f_15', 'f_16', 'f_17']]

race_ = data[['black', 'white', 'asian_pacific_islander', 'american_indian']]

## Male participation in criminal activities over the years

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

# Initialize the FacetGrid object
pal = sns.cubehelix_palette(30, rot=-.25, light=.7)
g = sns.FacetGrid(data, row="offense_name", hue="offense_name", aspect=15, height=.5, palette=pal)

# Draw the densities in a few steps
g.map(sns.barplot, "total_male",
      clip_on=False,
      fill=True, alpha=1, linewidth=0.7)
g.map(sns.barplot, "total_male", clip_on=False, lw=0.7)
g.map(plt.axhline, y=0, lw=1, clip_on=False)


# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(0, 0.5, label, fontweight="bold", color=color,
            ha="right", va="center", transform=ax.transAxes)


g.map(label, "total_male")

# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)

# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
#plt.title('Male participation in criminal activities over the years', fontsize = 16)
plt.show()

## Female participation in criminal activities over the years

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(10, 15))
#ax.set_xscale("log")

# Plot the orbital period with horizontal boxes
sns.boxplot(x="total_female", y="offense_name", data=data,
            whis=[0, 100], width=.6, palette="Reds")

# Add in points to show each observation
sns.stripplot(x="total_female", y="offense_name", data=data,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
#ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=False)
plt.title('Female participation in criminal activities over the years', fontsize = 16)
plt.show()

## Comparison between male and female participation in juvenile criminal acts

In [None]:
plt.subplots(figsize=(8, 15))

ax = sns.scatterplot(x="total_female", y="offense_name", data=data, linewidth=50, color = 'salmon', alpha = 0.2, size = 'total_female', sizes=(20, 100), label = 'Female')
ax = sns.scatterplot(x="total_male", y="offense_name", data=data, linewidth=50, color = 'teal', alpha = 0.2, size = 'total_male', sizes=(20, 200), label = 'Male')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set(ylabel=None)
ax.set(xlabel=None)
plt.title('Comparison between male and female participation in juvenile criminal acts', fontsize = 16)
plt.legend()
plt.show()

## How distribution of male & female juvenile participation looks like?

In [None]:
plt.subplots(figsize=(12, 6))
ax = sns.histplot(data.total_female, color = 'salmon')
ax = sns.histplot(data.total_male, color = 'gray', alpha = 0.3)
plt.xlabel('Numbers to count')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.title('How distribution of male & female juvenile participation looks like?', fontsize = 16)
plt.show()

## Agewise criminal acts among the male of 0-17 years old male child

In [None]:
plt.subplots(figsize=(15, 8))
ax = sns.swarmplot(data=male, palette="Blues", orient="h")
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.title('Agewise criminal acts among the male of 0-17 years old male child', fontsize = 16)
plt.show()

## Agewise criminal acts among the male of 0-17 years old female child

In [None]:
plt.subplots(figsize=(15, 8))
ax = sns.swarmplot(data=female, palette="Reds", orient="h")
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.title('Agewise criminal acts among the male of 0-17 years old female child', fontsize = 16)
plt.show()

## Yearly trend of total count of crimes by different age-groups

In [None]:
age_wise = pd.DataFrame()
age_wise['year'] = data.year
age_wise['0_9_yo'] = data['m_0_9'] + data['f_0_9']
age_wise['10_12_yo'] = data['m_10_12'] + data['f_10_12']
age_wise['13_14_yo'] = data['m_13_14'] + data['f_13_14']
age_wise['15_yo'] = data['m_15'] + data['f_15']
age_wise['16_yo'] = data['m_16'] + data['f_16']
age_wise['17_yo'] = data['m_17'] + data['f_17']
age_wise

In [None]:
age_ = age_wise[['0_9_yo', '10_12_yo', '13_14_yo', '15_yo', '16_yo', '17_yo']]

plt.subplots(figsize=(12, 6))
ax = male.sum().plot(kind = 'bar', color = 'teal', alpha = 0.3, label = 'Total by the male juve')
ax = female.sum().plot(kind = 'bar', color = 'salmon', label = 'Total by the female juve')
ax = age_.sum().plot(linestyle = '--', label = 'Total of the group')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.title('Sum of all the criminal acts caused by juveniles of different age-group', fontsize = 16)
plt.xticks(rotation = 'horizontal')
plt.legend()
plt.show()

## Average criminal acts over the years by juveniles of different age-group

In [None]:
age_wise_mean = age_wise.groupby('year')[ '0_9_yo', '10_12_yo', '13_14_yo', '15_yo', '16_yo', '17_yo'].agg('mean')
age_wise_mean

In [None]:
plt.subplots(figsize=(12, 6))
palette = sns.color_palette("mako_r", 6)
ax = sns.lineplot(data = age_wise_mean, palette=palette)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.title('Average criminal acts over the years by juveniles of different age-group', fontsize = 16)
plt.show()

In [None]:
age_wise_mean['0_9_yo']

In [None]:
import pandas as pd
import numpy as np
date_range = pd.period_range("1994", "2016",freq='Y')
df= pd.DataFrame(index=date_range)

df['0_9_yo'] = age_wise_mean['0_9_yo'].values
df['%_change_0_9_yo'] = df['0_9_yo'].pct_change(1)

df['10_12_yo'] = age_wise_mean['10_12_yo'].values
df['%_change_10_12_yo'] = df['10_12_yo'].pct_change(1)

df['13_14_yo'] = age_wise_mean['13_14_yo'].values
df['%_change_13_14_yo'] = df['13_14_yo'].pct_change(1)

df['15_yo'] = age_wise_mean['15_yo'].values
df['%_change_15_yo'] = df['15_yo'].pct_change(1)

df['16_yo'] = age_wise_mean['16_yo'].values
df['%_change_16_yo'] = df['16_yo'].pct_change(1)

df['17_yo'] = age_wise_mean['17_yo'].values
df['%_change_17_yo'] = df['17_yo'].pct_change(1)

df

## Change in average criminal acts over the years by juveniles of different age-group

In [None]:
pct_age_wise = df[['%_change_0_9_yo', '%_change_10_12_yo', '%_change_13_14_yo', '%_change_15_yo',
       '%_change_16_yo', '%_change_17_yo']]
pct_age_wise.dropna(inplace= True)
pct_age_wise = pct_age_wise.reset_index(drop = True)
pct_age_wise

In [None]:
plt.subplots(figsize=(15, 8))
#palette = sns.color_palette("mako_r", 6)
ax = sns.lineplot(data = pct_age_wise)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.title('Change in average criminal acts over the years by juveniles of different age-group', fontsize = 16)
ax.set_xticklabels(['1995', '1996','1997','1998', '1999', '2000','2001','2002','2003','2004','2005','2006','2007', '2008','2009','2010', '2011','2012','2013', '2014', '2015', '2016'])
plt.show()

## Criminal acts over the years by juveniles of different racial-group

In [None]:
plt.subplots(figsize=(12, 6))
palette = sns.color_palette("mako_r", 4)
race = data[['black', 'white', 'asian_pacific_islander', 'american_indian']]
ax = sns.boxplot(data = race, palette = palette)
plt.title('Criminal acts over the years by juveniles of different racial-group', fontsize = 16)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()

## Average criminal acts caused by juveniles of different racial-group

In [None]:
race_wise_mean = data.groupby('year')['black', 'white', 'asian_pacific_islander', 'american_indian'].agg('mean')
race_wise_mean 

In [None]:
plt.subplots(figsize=(12, 6))
palette = sns.color_palette("mako_r", 4)
ax = sns.lineplot(data = race_wise_mean, palette=palette)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.title('Average criminal acts over the years by juveniles of different racial-group', fontsize = 16)
plt.show()

<img src= "https://www.reedpublicrelations.com/wp-content/uploads/2019/11/Reed-blog-post-image.jpg" alt ="Titanic" style='width: 120px;'>

## If you liked it, please upvote!