In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import datetime
import matplotlib.gridspec as gridspec

In [None]:
%matplotlib inline

In [None]:
data = pd.read_csv('/kaggle/input/gun-violence-data/gun-violence-data_01-2013_03-2018.csv')

In [None]:
data.shape

In [None]:
data

In [None]:
data.describe()

**Modificando o tipo de coluna**

In [None]:
data.dtypes

In [None]:
data.columns

In [None]:
data = data.astype({
    'date':np.datetime64
})

**Trabalhando com a data**

In [None]:
def get_periodo_resampled(df, periodo, group_by="state"):
    """Método que retorna um dataframe com o periodo e grupo especificado

    Args:
        df (pandas.DataFrame): dataframe de incidentes
        periodo (string): "M" etc.
        group_by (string, optional): pode ser qualquer nome de coluna do df. Defaults to "state".

    Returns:
        [pandas.DataFrame]: dataframe resampled
    
    periodo opções:
    B         business day frequency
    C         custom business day frequency (experimental)
    D         calendar day frequency
    W         weekly frequency
    M         month end frequency
    SM        semi-month end frequency (15th and end of month)
    BM        business month end frequency
    CBM       custom business month end frequency
    MS        month start frequency
    SMS       semi-month start frequency (1st and 15th)
    BMS       business month start frequency
    CBMS      custom business month start frequency
    Q         quarter end frequency
    BQ        business quarter endfrequency
    QS        quarter start frequency
    BQS       business quarter start frequency
    A         year end frequency
    BA, BY    business year end frequency
    AS, YS    year start frequency
    BAS, BYS  business year start frequency
    BH        business hour frequency
    H         hourly frequency
    T, min    minutely frequency
    S         secondly frequency
    L, ms     milliseconds
    U, us     microseconds
    N         nanoseconds

    group_by opções:
    
    ['incident_id', 'date', 'state', 'city_or_county', 'address', 'n_killed',
       'n_injured', 'incident_url', 'source_url',
       'incident_url_fields_missing', 'congressional_district', 'gun_stolen',
       'gun_type', 'incident_characteristics', 'latitude',
       'location_description', 'longitude', 'n_guns_involved', 'notes',
       'participant_age', 'participant_age_group', 'participant_gender',
       'participant_name', 'participant_relationship', 'participant_status',
       'participant_type', 'sources', 'state_house_district',
       'state_senate_district']
    """    
    df['date'] = df['date'].astype(np.datetime64)
    df = df.set_index(df.date)
    resampled = df.resample(periodo, on='date')
    lista = []
    for name, group in resampled:
        grouped =  group.groupby([group_by])
        grouped = pd.DataFrame(grouped.sum()).reset_index()
        grouped['date'] = name
        if not grouped.empty:
            lista.append(grouped)

    # adicional code to return to normal dataframe
    dataframe_concated = pd.DataFrame([])
    for df in lista:
        dataframe_concated = pd.concat([dataframe_concated, df], ignore_index=True)

    return dataframe_concated

In [None]:
data_resampled = get_periodo_resampled(df=data, periodo='M', group_by='state')

In [None]:
# pegando os valores únicos da coluna
data_resampled.date.unique()

**Próximos passos
Fazer o plot bacana**

In [None]:
def filtrar_date(df, date):
    date_ = datetime.date.fromisoformat(date)
    return df[df.date.map(lambda x: x == date_)]

In [None]:
df_filtered = filtrar_date(data_resampled, "2018-02-28")

In [None]:
df_filtered.iloc[0]

In [None]:
x = df_filtered.state.values # pegar somente primeira letra

In [None]:
y = df_filtered.n_killed.values

In [None]:
x.sort()

In [None]:
y.sort()

In [None]:
xlabel = df_filtered.state
n_labels = range(df_filtered.shape[0])
plt.xticks( n_labels, xlabel, fontsize=14,rotation=65)  # Set text labels and properties.
plt.suptitle('N Killed por estado')
plt.title("Em " + str(df_filtered.iloc[0].date))
plt.legend(loc='best')

# plt.subplots_adjust(left=0.4, right=0.5)
plt.subplots_adjust(left=0, bottom=None, right=3, top=None, wspace=30, hspace=0.5)
plt.bar(x, y) ## talvez esse gráfico seja melhor em barras

In [None]:
plt.plot(x, y)

In [None]:
df_grouped_by_state = data_resampled.groupby("state")

In [None]:
df_alabama = df_grouped_by_state.get_group("Alabama")

In [None]:
x = df_alabama.date
y = df_alabama.n_killed

In [None]:
plt.xticks(rotation=90)  # Set text labels and properties.
plt.suptitle('N Killed no Alabama')
plt.plot(x, y)

In [None]:
plt.plot(x,y, 'b--')
plt.scatter(x, y)
plt.legend(loc='best')
plt.show()

**Analisando o número de armas involvidas**

In [None]:
df_sumed = data.groupby("n_guns_involved").sum()

x = df_sumed.index.values[0:10]

y = df_sumed.n_injured.values[0:10]


plt.xticks(rotation=90)  # Set text labels and properties.
plt.suptitle('N guns vs  N feridas')
plt.plot(x, y)

**Analisando o tipo de arma**

In [None]:
coluna = "n_killed"
df_sumed = data.groupby("gun_type").sum()
df_sumed = df_sumed.sort_values(coluna, ascending=False)
x = df_sumed.index.values[0:10]
y = df_sumed[coluna].values[0:10]

plt.xticks(rotation=90)  # Set text labels and properties.
plt.suptitle('Gun type vs  N killed')
plt.bar(x, y)

In [None]:
coluna = "n_injured"
df_sumed = data.groupby("gun_type").sum()

df_sumed = df_sumed.sort_values(coluna, ascending=False)
x = df_sumed.index.values[0:10]
y = df_sumed[coluna].values[0:10]

plt.xticks(rotation=90)  # Set text labels and properties.
plt.suptitle('Gun type vs  N injured')
plt.bar(x, y)

**Pegando os estados onde houve decrescimo nos acidentes**

In [None]:
data_anual = get_periodo_resampled(df=data, periodo='A', group_by='state')

**Pegando os estados onde houve decrescimo nos acidentes**

In [None]:
data_anual = data_anual.sort_values("date")

group_states = data_anual.groupby("state")

df_alabama = group_states.get_group('Alabama') # testando pro Alabama

df_alabama.index = df_alabama.date # mudando o index para date
df_alabama.n_killed.plot(color='#17a589', label=' numero de mortos')

df_alabama.n_injured.plot(label=' numero de feridos')

df_alabama.n_guns_involved.plot(label=' numero de armas envolvidas')
plt.title('Alabama dados')
plt.legend(loc=0)
# pegando lista de estados que diminuitam de 2017 pra 2018
lista = []

for name, group in group_states:
    size = group.shape[0]
    if group.n_killed.iloc[size-1] < group.n_killed.iloc[size-2]:
        group['state'] = name
        lista.append(group)

len(lista)

# acho que houve uma subnotificação a partir de 2018, todos os estados diminuiram o número de acidentes

lista_2 = []

for name, group in group_states:
    size = group.shape[0]
    if group.n_killed.iloc[size-3] < group.n_killed.iloc[size-2]:
        group['state'] = name
        lista.append(group)

len(lista_2)

In [None]:
data_anual = data_anual.sort_values("date")

group_states = data_anual.groupby("state")

df_alabama = group_states.get_group('Wyoming') # testando pro Wyoming

df_alabama.index = df_alabama.date # mudando o index para date
df_alabama.n_killed.plot(color='#17a589', label=' numero de mortos')

df_alabama.n_injured.plot(label=' numero de feridos')

df_alabama.n_guns_involved.plot(label=' numero de armas envolvidas')
plt.title('Wyoming dados')
plt.legend(loc=0)

**Verificando hora do acidente**

In [None]:
data.columns

In [None]:
data.participant_age

In [None]:
groups = data.groupby("participant_gender")

In [None]:
data.participant_gender

**Verificando o estado com mais armas**

In [None]:
data_anual = get_periodo_resampled(df=data, periodo='A', group_by='state')

In [None]:
df_grouped = data_anual.groupby('date') # agrupando por date

In [None]:
df_2017 = df_grouped.get_group("2017-12-31") # pegando somente o ano de 2017

In [None]:
# Vendo o maximo e mínimo dos estado
df_2017.index = df_2017.n_guns_involved
df_2017.sort_index()

In [None]:
data_anual = get_periodo_resampled(df=data, periodo='A', group_by='state')
df_alabama = group_states.get_group('Alabama') # testando pro Alabama
df_california = group_states.get_group('California') # testando pro California

In [None]:
df_california.index = df_california.date
df_california.n_killed.plot(label=' California')
df_alabama.index = df_alabama.date
df_alabama.n_killed.plot(label=' Alabama')

plt.title('N de mortes')
plt.legend(loc=0)

In [None]:
df_california = group_states.get_group('California') # testando pro California
df_alabama = group_states.get_group('Alabama') # testando pro Alabama
df_Illinois = group_states.get_group('Illinois') # testando pro Illinois
df_Florida = group_states.get_group('Florida') # testando pro Florida

In [None]:
plt.figure(figsize=(30, 8))

plt.subplot(spec2[0, 0])
df_alabama.n_killed.plot(label=' Alabama')
plt.legend(loc=0)
plt.subplot(spec2[0, 1])
df_california.n_killed.plot(label=' California')
plt.legend(loc=0)
plt.subplot(spec2[1, 0])
df_Illinois.n_killed.plot(label=' Illinois')
plt.legend(loc=0)
plt.subplot(spec2[1, 1])
df_Florida.n_killed.plot(label=' Florida')
plt.suptitle('N de mortes por estado e tempo')
plt.legend(loc=0)
plt.show()

In [None]:
df_alabama.n_killed.plot(label=' Alabama')
df_california.n_killed.plot(label=' California')
df_Illinois.n_killed.plot(label=' Illinois')
df_Florida.n_killed.plot(label=' Florida')
plt.suptitle('N de mortes por estado e tempo')
plt.legend(loc=0)
plt.show()

In [None]:
#Mostrar o numero de incidentes por estado
state_crime = data['state'].value_counts().head(30)
plt.figure(figsize=(30, 8))
# state_crime
plt.pie(state_crime, labels=state_crime.index,autopct='%1.1f%%', shadow=True)

In [None]:
#As 10 principais cidades com maior número de incidentes
plt.figure(figsize=(30, 8))
top_10_city = data['city_or_county'].value_counts().keys().tolist()[0:9]
top_10_values = data['city_or_county'].value_counts().tolist()[0:9]
x=top_10_city
y=top_10_values
plt.bar(x,y)

In [None]:
#  Por Ano - contagem de incidentes criminais
Yearly_incidents_label = data['date'].value_counts().keys()
Yearly_incidents_count = data['date'].value_counts().tolist()

x=Yearly_incidents_label
y=Yearly_incidents_count
plt.figure(figsize=(30, 3))
plt.scatter(x, y)

**Verificando gênero**

In [None]:
groups = data.groupby("participant_gender")

In [None]:
data.participant_gender

In [None]:
data['n_female'] = data.participant_gender.map(lambda x: str(x).count("Female"))

In [None]:
data['n_male'] = data.participant_gender.map(lambda x: str(x).count("Male"))

In [None]:
data['n_male']

In [None]:
data['n_female']

**Comparando Male vs Female**

In [None]:
data_resampled = get_periodo_resampled(df=data, periodo='A', group_by='state')

In [None]:
groups = data_resampled.groupby("date")

In [None]:
keys = groups.groups.keys()

In [None]:
yearly_data_state = data[["state"]]

In [None]:
lista = [groups.get_group(key) for key in keys]

In [None]:
lista[0]

In [None]:
D = list(keys)[0]

In [None]:
D.year

In [None]:
for index, df in enumerate(lista):
    result = df.sum(axis=0)
    result.n_female
    
    if (index >= 3):
        plt.subplot(2, 3, index+1)
    else:
        plt.subplot(2, 3, index+1)

    plt.title(list(keys)[index].year)
    plt.pie([result.n_female, result.n_male], labels=["Female", "Male"], autopct='%1.1f%%', shadow=True )

In [None]:
df2 = df_2017.sort_values([ 'n_killed'])
df2

In [None]:
df_california = group_states.get_group('California') # testando pro California
df_california
# for index, df in (df_california):
#     result = df.sum(axis=0)
#     result.n_female
    
#     if (index >= 3):
#         plt.subplot(2, 3, index+1)
#     else:
#         plt.subplot(2, 3, index+1)

#     plt.title(list(keys)[index].year)
#     plt.pie([result.n_female, result.n_male], labels=["Female", "Male"], autopct='%1.1f%%', shadow=True )

## Verificando mês de janeiro

In [None]:
data_anual = get_periodo_resampled(df=data, periodo='M', group_by='state')

In [None]:
D.month

In [None]:
df_jan = data_anual[data_anual.date.map(lambda x: x.month == 1)]
df_fev = data_anual[data_anual.date.map(lambda x: x.month == 2)]
df_mar = data_anual[data_anual.date.map(lambda x: x.month == 3)]
df_aug = data_anual[data_anual.date.map(lambda x: x.month == 8)]

In [None]:
df_jan.groupby("date").sum() # pegando o total de todos os estados juntos

In [None]:
df_fev.groupby("date").sum() # pegando o total de todos os estados juntos

In [None]:
df_mar.groupby("date").sum() # pegando o total de todos os estados juntos

In [None]:
df_aug.groupby("date").sum() # pegando o total de todos os estados juntos

In [None]:
# plt.figure(figsize=(30, 8))
# ano = df_mar.date
# n_mortos = df_mar['n_killed']
# plt.bar(ano,n_mortos)

### pegando o mês de janeiro de somente um estado

In [None]:
df_jan.groupby("state").get_group("Alabama")

In [None]:
df_jan.groupby("state").get_group("California")

In [None]:
df_aug.groupby("state").get_group("Virginia")

In [None]:
## Verificando mortes e feridos em virginia em agosto

data_anual = get_periodo_resampled(df=data, periodo='M', group_by='state')

groups = data_anual.groupby("state")

df_virginia = groups.get_group("Virginia")

df_virginia_agosto = df_virginia[df_virginia.date.map(lambda x: x.month == 8)]

df_virginia_agosto

for index, df in enumerate(df_virginia_agosto.itertuples()):
    
    plt.subplot(2, 3, index+1)
    plt.title(df.date.year)
    plt.pie([df.n_killed, df.n_injured], labels=["N killed", "N injured"], autopct='%1.1f%%', shadow=True )

In [None]:
x = df_virginia.date.map(lambda x: x.year)
y1 = df_virginia.n_killed.values
y2 = df_virginia.n_injured.values
plt.xticks(rotation=90)  # Set text labels and properties.
plt.suptitle('Anual acidents')
plt.bar(x, y1, color='orange')
# 

In [None]:
plt.bar(x, y2, color='green')

In [None]:
labels = df_virginia.date.map(lambda x: x.year)
men_means = df_virginia.n_killed.values
women_means = df_virginia.n_injured.values

x = np.arange(len(labels))  # the label locations
width = 0.45  # the width of the bars

fig, ax = plt.subplots()
# fig, ax = plt.figure(figsize=(30, 3))
rects1 = ax.bar(x - width/2, men_means, width, label='N killed')
rects2 = ax.bar(x + width/2, women_means, width, label='N injured')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Numbers')
ax.set_title('Numbers')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)
plt.legend(loc='best')
fig.tight_layout()
plt.show()

### Verificando as armas

In [None]:
lista_armas = []
for guns in data.gun_type:
    armas = guns.split("||")
    for arma in armas:
        try:
            lista_armas.append(arma.split('::')[1])
        except:
            pass

In [None]:
armas = np.unique(lista_armas, return_counts=True)
df = pd.DataFrame({'gun': armas[0], 'ammount': armas[1]})
df = df[df['gun'] != 'Unknown']
df = df.sort_values('ammount')


plt.bar(df['gun'], df['ammount'], color='orange')
plt.suptitle('Gun types')
plt.xticks(rotation=90)
plt.show()

## Verificando Estado com mais arma

In [None]:
estados = data.groupby("state").sum()

In [None]:
estados.n_guns_involved.sort_values()

# 1) Podemos concluir que o ano de 2017 O numero de crimes, mortos e feridos foi maior relação aos outros anos, comparando com todos os estados. O numero de Mulheres em relação aos homens neste ano é menor que o padrão dos outros anos. Levando em consideração essas afirmações fui pesquisar o pq disso e me deparei com a eleição do Trump, onde houveram manifestações em todos os estados. Essas manifestações causaram esses numeros. Para resolver esse problema, podemos reforçar o policiamento durante o periodo de eleições e nos estados onde tiveram o maior numero de mortes e feridos, chamar o exercito para colocar ordem na rua.

# 2) Podemos concluir que no ano de 2017 o numero de mortos e feridos em agosto de 2017 em Virginia foi elevado. A resposta para isso foram as manifestacoes que aconteceram no estado que acabou entrando em conflito com um grupo extremista e o conflito fez os numeros crescerem. Para resolver esse problema, podemos reforçar as materias de historia, sociologia e etica nas escolas para diminuir esse numero de extremistas nas regioes do sul do pais. Todos os dados para afirmar essas afirmacoes estao logo abaixo

https://brasil.elpais.com/brasil/2017/01/20/internacional/1484909226_568037.html

https://brasil.elpais.com/brasil/2017/01/20/internacional/1484934027_165716.html

http://g1.globo.com/mundo/videos/v/carro-atropela-varias-pessoas-durante-protesto-violento-em-virginia-nos-eua/6075064/