In [1]:
import re
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
import sys
from src.cleaning_functions import *

In [3]:
sharks = pd.read_csv("./data/shark_attacks.csv",encoding = "ISO-8859-1")

Hipótesis:el número del muertes por tiburon ha descendido a lo largo del tiempo

In [4]:
sharks.sample(5)

Unnamed: 0,year,months_code,day,hour,country,sex,age,activity,species,type,injury,fatal_(y/n)
2264,1996.0,12.0,10.0,16.0,South Africa,M,,surf,white shark,Provoked,injury,NO
4769,1939.0,11.0,9.0,,Australia,,,,,Boating,injury,
2506,1993.0,1.0,5.0,9.0,Australia,M,21.0,fishing,,Provoked,injury,NO
4669,1943.0,1.0,26.0,,,M,,,,Sea disaster,fatal,YES
2111,1999.0,7.0,15.0,11.0,South Africa,M,14.0,surf,white shark,Provoked,fatal,YES


### Ataques por año en función de si son o no mortales

In [5]:
muertes_por_año_division = go.Figure()

muertes_por_año_division.add_trace(
    go.Histogram(
        x=sharks[(sharks["year"]>1950)&(sharks["fatal_(y/n)"]=="NO")].year
        ,name="No mortal"))

muertes_por_año_division.add_trace(
    go.Histogram(
        x=sharks[(sharks["year"]>1950)&(sharks["fatal_(y/n)"]=="YES")].year
        ,name="Mortal"))

muertes_por_año_division.update_layout(
    title="Datos de accidentes por año",
    xaxis_title="Años",
    yaxis_title="Accidentes",
    legend_title="Accidente",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="Black"
    )
)

muertes_por_año_division.show()
muertes_por_año_division.write_html("./images/muertes_por_año_division.html")

Puedes ver la figura haciendo click [aquí](https://nbviewer.org/github/rodrigogalan/data-cleaning-pandas/blob/main/figures/muertes_por_año_division.html)

### Ataques por mes en función del sexo

In [6]:
grafico_paises = list(sharks[(sharks["year"].notna())]["country"].value_counts().head(5).keys())

In [7]:
muertes_por_mes = go.Figure()

muertes_por_mes.add_trace(
    go.Histogram(
        x=sharks[(sharks.year>1800)&(sharks.country=="United States of America")&(sharks.sex.notna())&(sharks.sex=="F")]["months_code"]
        ,name="Female"))

muertes_por_mes.add_trace(
    go.Histogram(x=sharks[(sharks.year>1800)&(sharks.country=="United States of America")&(sharks.sex.notna())&(sharks.sex=="M")]["months_code"]
                 ,name="Male"))


muertes_por_mes.update_layout(
    title="Muertes por mes",
    xaxis_title="Meses",
    yaxis_title="Accidentes",
    legend_title="Sexo",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="RebeccaPurple"
    )
)

muertes_por_mes.show()
muertes_por_mes.write_html("./images/muertes_por_mes.html")

Puedes ver la figura haciendo click [aquí](https://nbviewer.org/github/rodrigogalan/data-cleaning-pandas/blob/main/figures/muertes_por_mes.html)

### Ataques en función del país

In [8]:
muertes_por_pais = plt.figure(figsize=(15,4))
muertes_por_pais = px.histogram(
    sharks, x=sharks[(sharks.year>1800)&(sharks.country.notna())&(sharks.country.isin(grafico_paises))]["year"],
    color=sharks[(sharks.year>1800)&(sharks.country.notna())&(sharks.country.isin(grafico_paises))]["country"]
)

muertes_por_pais.update_layout(
    title="Datos de accidentes por paises",
    xaxis_title="Años",
    yaxis_title="Accidentes",
    legend_title="Paises",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="Black"
    )
)
muertes_por_pais.show()
muertes_por_pais.write_html("./images/muertes_por_pais.html")

<Figure size 1080x288 with 0 Axes>

Puedes ver la figura haciendo click [aquí](https://nbviewer.org/github/rodrigogalan/data-cleaning-pandas/blob/main/figures/muertes_por_pais.html)

### Ataques en EEUU en función del sexo

In [9]:
ataques_EEUU = go.Figure()

ataques_EEUU.add_trace(
    go.Histogram(
        x=sharks[(sharks.year>1800)&(sharks.country=="United States of America")&(sharks.sex.notna())&(sharks.sex=="F")]["year"]
        ,name="Female"))

ataques_EEUU.add_trace(
    go.Histogram(x=sharks[(sharks.year>1800)&(sharks.country=="United States of America")&(sharks.sex.notna())&(sharks.sex=="M")]["year"]
                 ,name="Male"))


ataques_EEUU.update_layout(
    title="Ataques en EEUU por año",
    xaxis_title="Años",
    yaxis_title="Ataques",
    legend_title="Sexo",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="RebeccaPurple"
    )
)

ataques_EEUU.show()
ataques_EEUU.write_html("./images/ataques_EEUU.html")

Puedes ver la figura haciendo click [aquí](https://nbviewer.org/github/rodrigogalan/data-cleaning-pandas/blob/main/figures/ataques_EEUU.html)

### Datos de muertes por actividad

In [10]:
actividades = list(sharks[(sharks["fatal_(y/n)"].notna())]["activity"].value_counts().keys())
len(actividades)

9

In [11]:
activity = go.Figure()

activity.add_trace(
    go.Histogram(
        x=sharks[(sharks.activity.notna())&(sharks["fatal_(y/n)"]=="NO")]["activity"]
        ,name="No mortal"))

activity.add_trace(
    go.Histogram(
        x=sharks[(sharks.activity.notna())&(sharks["fatal_(y/n)"]=="YES")]["activity"]
        ,name="Mortal"))

activity.update_layout(
    title="Datos de accidentes por actividad",
    xaxis_title="Actividades",
    yaxis_title="Accidentes",
    legend_title="Accidente",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="Black"
    )
)

activity.show()
activity.write_html("./images/activity.html")

Puedes ver la figura haciendo click [aquí](https://nbviewer.org/github/rodrigogalan/data-cleaning-pandas/blob/main/images/activity.html)