# Guns in USA

https://www.atf.gov/resource-center/data-statistics

In [13]:
import pandas as pd
import numpy as np
import sidetable

<font color= "cian"> data from: https://fivethirtyeight.com/features/gun-deaths-about/ 

<font color= "cian"> Charging the data

In [6]:
df = pd.read_csv("full_data.csv", index_col = 0)

<font color= "cian"> Exploring the data: I want to explore the shape of our dataframe(df), unique values, typo errors, types of data...

In [57]:
df.head()

Unnamed: 0,year,month,intent,police,sex,age,race,hispanic,place,education
1,2012,1,Suicide,0,M,34.0,Asian/Pacific Islander,100,Home,BA+
2,2012,1,Suicide,0,F,21.0,White,100,Street,Some college
3,2012,1,Suicide,0,M,60.0,White,100,Other specified,BA+
4,2012,2,Suicide,0,M,64.0,White,100,Home,BA+
5,2012,2,Suicide,0,M,31.0,White,100,Other specified,HS/GED


In [55]:
df.shape

(100798, 10)

In [11]:
df.dtypes

year           int64
month          int64
intent        object
police         int64
sex           object
age          float64
race          object
hispanic       int64
place         object
education     object
dtype: object

<font color= "cian"> All the types look correctly.

<font color= "cian"> Exploring nulls

In [89]:
percent_missing = pd.DataFrame(df.isna().sum()*100 / len(df))
percent_missing

Unnamed: 0,0
year,0.0
month,0.0
intent,0.0
police,0.0
sex,0.0
age,0.0
race,0.0
hispanic,0.0
place,0.0
education,0.0


<font color= "cian">

<font color= "cian"> I assume that this 1% of nulls is not so much, then I will eliminate them.

In [88]:
df.dropna(inplace= True)

<font color= "cian"> Are there duplicates?

In [91]:
df.duplicated().sum()*100/len(df)

39.190940162220066

<font color= "cian"> Different categories or types of gun deaths

In [41]:
df.columns

Index(['year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic',
       'place', 'education'],
      dtype='object')

In [53]:
for col in df.columns:
    print("\n In the variable ----------> ", col,"----- > These are the unique values:  ", df[col].unique())


 In the variable ---------->  year ----- > These are the unique values:   [2012 2013 2014]

 In the variable ---------->  month ----- > These are the unique values:   [ 1  2  3  4  5  6  7  8  9 10 11 12]

 In the variable ---------->  intent ----- > These are the unique values:   ['Suicide' 'Undetermined' 'Accidental' 'Homicide' nan]

 In the variable ---------->  police ----- > These are the unique values:   [0 1]

 In the variable ---------->  sex ----- > These are the unique values:   ['M' 'F']

 In the variable ---------->  age ----- > These are the unique values:   [ 34.  21.  60.  64.  31.  17.  48.  41.  50.  nan  30.  43.  27.  55.
  53.  51.  65.  52.  47.  19.  22.  56.  38.  25.  26.  23.  87.  28.
  39.  59.  61.  12.  40.  57.  20.  62.  71.  44.  29.  24.   9.  77.
  18.  36.  49.  13.  33.  90.  63.  46.  32.  84.  68.  89.  58.  78.
  66.  81.  54.  45.  37.  35.  42.  67.  69.  72.  82.  75.  73.  15.
  83.  74.  86.  16.  88.   3.  80.   5.  79.  91.   4.  76.  14. 

<font color= "cian"> Different categories or types of gun deaths

In [12]:
df.describe(include="object")

Unnamed: 0,intent,sex,race,place,education
count,100797,100798,100798,99414,99376
unique,4,2,5,10,4
top,Suicide,M,White,Home,HS/GED
freq,63175,86349,66237,60486,42927


<font color= "cian"> How many gun deaths are there by race?

In [14]:
df["race"].value_counts()

White                             66237
Black                             23296
Hispanic                           9022
Asian/Pacific Islander             1326
Native American/Native Alaskan      917
Name: race, dtype: int64

In [15]:
df.stb.freq(["race"])

Unnamed: 0,race,count,percent,cumulative_count,cumulative_percent
0,White,66237,65.712613,66237,65.712613
1,Black,23296,23.11157,89533,88.824183
2,Hispanic,9022,8.950574,98555,97.774757
3,Asian/Pacific Islander,1326,1.315502,99881,99.09026
4,Native American/Native Alaskan,917,0.90974,100798,100.0


<font color= "green"> Mean age by race

In [36]:
mean_race = round(df.groupby(["race"])["age"].mean(),2)
mean_race

race
Asian/Pacific Islander            38.62
Black                             31.10
Hispanic                          33.22
Native American/Native Alaskan    36.23
White                             50.01
Name: age, dtype: float64

<font color= "cian"> Trying colors