# Data Analytics

## Primer proyecto

### Vaccination

## Yael Contla

In [None]:
import numpy as np  # Algebra Líneal
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
import matplotlib.pyplot as plt
import warnings
from mlxtend.plotting import plot_confusion_matrix
from plotly.offline import plot, iplot, init_notebook_mode
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px
init_notebook_mode(connected=True)
warnings.filterwarnings("ignore")
%matplotlib inline
import datetime as dt

In [None]:
%%time
# import data
vacunacion = pd.read_csv('../input/covid-world-vaccination-progress/country_vaccinations.csv')
vacunacion.head()

## Seguimos explorando el Dataset, tamaño e información

In [None]:
# Explorar tamaño e información de los datos
print("El tamaño de la base de datos es: ", vacunacion.shape)
print("Info del dataset: ", vacunacion.info())

## Detectamos que 'date', está en formato "object", necesitamos cambiarlo a "DateTime"

In [None]:
vacunacion.columns

In [None]:
# Importamos los datos de nuevo para usar paser_date y cambiar a datetime
vacunacion = pd.read_csv('country_vaccinations.csv', parse_dates=['date'])

In [None]:
vacunacion.head(3)

In [None]:
# Explorar tamaño e información de los datos
print("El tamaño de la base de datos es: ", vacunacion.shape)
print("Info del dataset: ", vacunacion.info())

In [None]:
# se requiere pasar de float a int, pero no lo permite, primero se tienen que rellenar los valores nulos
vacunacion['total_vaccinations'] = vacunacion['total_vaccinations'].astype(int)

## Vamos a detectar los valores nulos eliminarlos o sustituir por otro valor, según sea el caso

In [None]:
#hacemos una tabla de los valores nulos
total_of_all = vacunacion.isnull().sum().sort_values(ascending=False)
percent_of_all = (vacunacion.isnull().sum()/vacunacion.isnull().count()).sort_values(ascending=False)
missing_data_test = pd.concat([total_of_all, percent_of_all], axis=1, keys=['Total Nulos', 'Percent'])
missing_data_test.head(9)

In [None]:
# Exploramos lo que contiene la variable total_of_all
total_of_all

In [None]:
#otro método rápido para ver valores nulos
vacunacion.isnull().sum()

In [None]:
# Traemos los datos "null" de "people_fully_vaccinated"
vacunacion[vacunacion["people_fully_vaccinated"].isnull()]

In [None]:
# Podemos eliminar la columna iso_code, source_name, source_website,  ya que no la necesitamos
vacunacion = vacunacion.drop(columns=['iso_code', 'source_name', 'source_website'])

In [None]:
# Comprobamos que se hayan eliminado las columnas seleccionadas previamente
vacunacion.head(2)

In [None]:
# Sustituimos todos los valores nulos con un "0"(cero) en la columna 'total_vaccinations'
vacunacion['total_vaccinations'] = vacunacion['total_vaccinations'].fillna(0)

In [None]:
# Comprobamos que se hayan rellenado la columna total_vaccinations 
vacunacion.head()

In [None]:
# Sustituimos todos los valores nulos con un "0"(cero)
vacunacion['people_vaccinated'] = vacunacion['people_vaccinated'].fillna(0)

In [None]:
# Sustituimos todos los valores nulos con un "0"(cero)
vacunacion['people_fully_vaccinated'] = vacunacion['people_fully_vaccinated'].fillna(0)
vacunacion['daily_vaccinations_raw'] = vacunacion['daily_vaccinations_raw'].fillna(0)
vacunacion['daily_vaccinations'] = vacunacion['daily_vaccinations'].fillna(0)
vacunacion['total_vaccinations_per_hundred'] = vacunacion['total_vaccinations_per_hundred'].fillna(0)
vacunacion['people_vaccinated_per_hundred'] = vacunacion['people_vaccinated_per_hundred'].fillna(0)
vacunacion['people_fully_vaccinated_per_hundred'] = vacunacion['people_fully_vaccinated_per_hundred'].fillna(0)
vacunacion['daily_vaccinations_per_million'] = vacunacion['daily_vaccinations_per_million'].fillna(0)

In [None]:
vacunacion.head()

In [None]:
# Explorar tamaño e información de los datos
print("El tamaño de la base de datos es: ", vacunacion.shape)
print("Info del dataset: ", vacunacion.info())

In [None]:
#explorar los valores únicos de country/Region
vacunacion['country'].unique()

In [None]:
#Acomodamos los datos por país
pivotepais = pd.pivot_table(vacunacion,index=["country", "date", "country"] ,aggfunc=np.max)
#pivotepais
pivotepais.loc["Mexico"].head(159)

In [None]:
vacunacion.shape

## Creación de Dataframes

In [None]:
Mexico = vacunacion[vacunacion['country'] == 'Mexico']
Mexico.tail(30)

In [None]:
Mexico.shape

In [None]:
vacunacion.sort_values(['country','date','total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated'])

In [None]:
# Ordenar los valores
vacunacion = vacunacion.sort_values(['country','date','total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated'])

In [None]:
# Para explorar los datos usamos loc y iloc
vacunacion.iloc[10000:11000:2] #usa valores numéricos filas, :2 de dos en dos

In [None]:
vacunacion.head(6)

In [None]:
vacunacion.iloc[4,5]  # trea el valor de la intersección fila, columna

In [None]:
vacunacion.iloc[12:39,1:5] # trae un intercalo especifico filas, columnas

# Análisis del Mundo

In [None]:
## Pasamos "total_vaccinations" de 'float' a 'int'
vacunacion['total_vaccinations'] = vacunacion['total_vaccinations'].astype(int)
vacunacion['people_fully_vaccinated'] = vacunacion['people_fully_vaccinated'].astype(int)
vacunacion['people_vaccinated'] = vacunacion['people_vaccinated'].astype(int)
vacunacion['daily_vaccinations'] = vacunacion['daily_vaccinations'].astype(int)
vacunacion['daily_vaccinations_raw'] = vacunacion['daily_vaccinations_raw'].astype(int)
vacunacion['total_vaccinations_per_hundred'] = vacunacion['total_vaccinations_per_hundred'].astype(int)t)

In [None]:
vacunacion_mundial = vacunacion.groupby('date').sum()

In [None]:
total_vacunas = vacunacion_mundial['total_vaccinations'].reset_index()
vacunados = vacunacion_mundial['people_fully_vaccinated'].reset_index()
personas_vacunadas = vacunacion_mundial['people_vaccinated'].reset_index()
vacunacion_diairia = vacunacion_mundial['daily_vaccinations'].reset_index()
vacunacion_crudas =  vacunacion_mundial['daily_vaccinations_raw'].reset_index()
total_vacunas_por_cien = vacunacion_mundial['total_vaccinations_per_hundred'].reset_index()

In [None]:
vacunacion_diairia

In [None]:
trace1 = go.Bar(x = vacunacion_diairia['date'],
              y = vacunacion_diairia['daily_vaccinations'],
              name = 'daily_vaccinations',
              marker_color = 'blue')

trace2 = go.Bar(x = vacunacion_crudas['date'],
              y = vacunacion_crudas['daily_vaccinations_raw'],
              name = 'daily_vaccinations_raw',
              marker_color = 'red')

layout = go.Layout(barmode = 'group',
                  bargap = 0,
                  bargroupgap = 0,
                  title = "Vacunacion contra el coronavirus - Vacunas, Vacunados, (Bar Chart)",
                  xaxis = dict(title='Month',zeroline= False,
                         gridcolor='rgb(183,183,183)',showline=True),
                  yaxis = dict(title='No. de vacunadoss',zeroline= False,
                            gridcolor='rgb(183,183,183)',showline=True),
                  font = dict(family='Courier New, monospace', size=12, color='rgb(0,0,0)'),
                  legend=dict(x=0, y=1.0, bgcolor='rgba(255, 255, 255, 0)', bordercolor='rgba(255, 255, 255, 0)'),
)

data = [trace1, trace2]
fig = go.Figure(data = data, layout = layout)
iplot(fig)

In [None]:
trace1 = go.Scatter(x = vacunacion_diairia['date'],
              y = vacunacion_diairia['daily_vaccinations'],
              name = 'daily_vaccinations',
              mode='lines+markers',
              line=dict(color='blue', width=1))

trace2 = go.Scatter(x = vacunacion_crudas['date'],
              y = vacunacion_crudas['daily_vaccinations_raw'],
              name = 'daily_vaccinations_raw',
              mode='lines+markers',
              line=dict(color='red', width=1))


layout = go.Layout(title = "Vacunacion contra el coronavirus - Vacunas, Vacunados, (Bar Chart)(Line Chart)",
                  xaxis = dict(title='Month',zeroline= False,
                         gridcolor='rgb(183,183,183)',showline=True),
                  yaxis = dict(title='No. de vacunados',zeroline= False,
                            gridcolor='rgb(183,183,183)',showline=True),
                  font = dict(family='Courier New, monospace', size=12, color='rgb(0,0,0)'),
                  legend=dict(x=0, y=1.0, bgcolor='rgba(255, 255, 255, 0)', bordercolor='rgba(255, 255, 255, 0)'),
)

data = [trace1, trace2]
fig = go.Figure(data = data, layout = layout)
iplot(fig)