# Environment Setup

## Importing Modules

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import folium

from itertools import chain
from folium.plugins import MarkerCluster, TimestampedGeoJson
from geopy.geocoders import Nominatim

In [None]:
plt.style.use('seaborn')

## Loading Dataset

Bellow, we see that each entry of the dataset present the overall stats up to that timestamp. Therefore, when dealing with the data, there is no need to accumulate these records.

In [None]:
df_brazil_raw = pd.read_csv("/kaggle/input/corona-virus-brazil/brazil_covid19.csv")
df_brazil_raw.columns = [c.capitalize() for c in df_brazil_raw.columns]
df_brazil_raw.Date = pd.to_datetime(df_brazil_raw.Date)
df_brazil_raw.head()

# Processing Information

## Filling Dates

In [None]:
def filling_date_gaps(df):
  # The snippet bellow is taken from stack overflow
  # https://stackoverflow.com/a/41274715/2313889
  new_index = pd.MultiIndex.from_product(df.index.levels, names=df.index.names)
  new_df= df.reindex(new_index)
  # Optional: convert missing values to zero, and convert the data back
  # to integers. See explanation below.
  new_df = new_df.fillna(0).astype(int).reset_index()

  return new_df

# By assumming Hour is sorted, we hold on to the last record of the day
df_brazil = df_brazil_raw.groupby(['State', 'Date']).last()
# At this point, we are not using it the Hour information
df_brazil = df_brazil.drop(columns=['Hour'])
df_brazil = filling_date_gaps(df_brazil)

In [None]:
df_brazil.head()

## Last Information per State

In [None]:
df_states_last = df_brazil.groupby("State").last().drop(columns=['Date'])
df_states_last.style.background_gradient(cmap='Reds')

## Retrieving Geolocation 

In [None]:
geolocator = Nominatim(user_agent="Corona")
def getLatLong(row):
  location = geolocator.geocode(f"{row.name}, Brazil")
  return (location.latitude, location.longitude)

In [None]:
# df_states_last['Location'] = df_states_last.apply(getLatLong, axis='columns')
df_states_last['Location'] = \
[(-9.0478679, -70.5264976),
 (-9.6611661, -36.6502426),
 (-4.479925, -63.5185396),
 (-12.285251, -41.9294776),
 (-5.3264703, -39.7156073),
 (-15.7754462, -47.7970891),
 (-19.5687682, -40.1721991),
 (-15.9323662, -50.1392928),
 (-5.2085503, -45.3930262),
 (-12.2115009, -55.5716547),
 (-19.5852564, -54.4794731),
 (-18.5264844, -44.1588654),
 (-24.4842187, -51.8148872),
 (-7.1219366, -36.7246845),
 (-4.7493933, -52.8973006),
 (-8.4116316, -37.5919699),
 (-7.6992782, -42.5043787),
 (-5.6781175, -36.4781776),
 (-29.8425284, -53.7680577),
 (-22.9110137, -43.2093727),
 (-10.943145, -62.8277863),
 (-27.0628367, -51.114965),
 (-10.6743911, -37.3773519),
 (-23.5506507, -46.6333824),
 (-10.8855129, -48.3716912)]

# Visualizing Information

## Plots

### Total Occurences

In [None]:
def metrics_per_state_bars():
  ax = df_states_last[['Suspects', 'Refuses', 'Cases']]\
        .sort_values('Suspects')\
        .plot(kind='bar', figsize=(12, 8), title="Metrics per State");
  ax.tick_params(axis='x', labelrotation=60);
  ax.set_xlabel("");

metrics_per_state_bars()

### Occurences over Time

In [None]:
def metrics_per_state_over_time_plot():
  cols = 'Suspects', 'Refuses', 'Cases'
  df = df_brazil.set_index(['State', 'Date'])
  pieces, types = zip(*[(df[c], [c]*len(df[c])) for c in cols])
  df = pd.concat(pieces).to_frame('Total').reset_index()
  df['Types'] = list(chain(*types))

  g = sns.FacetGrid(df, col='State', hue='Types', 
                    height=5, sharey=False, col_wrap=2);
  g.map(plt.plot, 'Date', 'Total');
  g.set_xticklabels(rotation=60);
  g.add_legend();

metrics_per_state_over_time_plot()

## Maps

### Total Occurences

In [None]:
def metrics_per_state_map():
  map_corona = folium.Map(location=df_states_last['Location']['Maranhão'],
                          min_zoom=2, max_zoom=15, zoom_start=4)
  
  def marker(row, container, color='blue'):
    folium.Marker(
      location=row.Location,
      icon=folium.Icon(color),
      tooltip=f'\
      <li>State: {row.name}\
      <li>Cases: {row.Cases}\
      <li>Suspects: {row.Suspects}\
      <li>Refuses: {row.Refuses}\
      '
    ).add_to(container)

  # map_cluster = MarkerCluster(options={
  #   'spiderfyOnMaxZoom': True,
  #   'showCoverageOnHover': True,
  #   'zoomToBoundsOnClick': True
  # })
  # map_cluster.add_to(map_corona)


  g_refu = folium.FeatureGroup(name='Refuses')
  g_susp = folium.FeatureGroup(name='Suspects')
  g_cases = folium.FeatureGroup(name='Cases')
  g_refu.add_to(map_corona)
  g_susp.add_to(map_corona)
  g_cases.add_to(map_corona)

  df = df_states_last
  df[df['Refuses'] > 0].apply(lambda x: marker(x, g_refu, 'blue'), axis='columns')
  df[df['Suspects'] > 0].apply(lambda x: marker(x, g_susp, 'gray'), axis='columns')
  df[df['Cases'] > 0].apply(lambda x: marker(x, g_cases, 'red'), axis='columns')

  # I believe there is a bug with autoZIndex
  folium.LayerControl(collapsed=False, autoZIndex=True).add_to(map_corona)
  return map_corona

metrics_per_state_map()  

### Occurences over Time

In [None]:
df_brazil_loc = df_brazil.merge(df_states_last['Location'], how="left", on="State")

In [None]:
df_brazil_loc.head()

In [None]:
def metrics_per_state_over_time_map():
  MAX = df_brazil_loc.Suspects.max()
  MAX_RADIUS = 28
  MIN_RADIUS = 4
  DATA = {
      'Refuses': 'Blue',
      'Suspects': 'Gray',
      'Cases': 'Red',
      }

  def buildGeoJson(points):
    return {
      "type": "FeatureCollection", 
      "features": points
    }

  def getPoints(row, data, color="blue"):
    return \
    {
      "type": "Feature",
      
      "geometry": {
        "type": "Point",
        "coordinates": [row.Location[1], row.Location[0]]
      },

      "properties": {
        "time": row.Date.strftime("%Y-%m-%d"),
        "popup": f"<li>{row.State}</li>",
        "toolTip": f"<li>{row.State}</li>",
        "icon": "circle",
        "iconstyle": {
          "fillColor": color,
          # "fillColor": "rgb(255, 50, 0,li 0)",
          "fillOpacity": 0.33,
          "stroke": "false",
          "radius": MIN_RADIUS + (MAX_RADIUS-MIN_RADIUS) * row[data]/MAX
          },
          "style": {"weight": 0},
      },
    }

  def getData(data, color):
    return df_brazil_loc[df_brazil_loc[data] > 0]\
      .apply(lambda x: getPoints(x, data, color), axis='columns').to_list()

  points = list(chain(*[getData(k, v) for (k, v) in DATA.items()]))
  map_corona = folium.Map(location=df_states_last['Location']['Maranhão'],
                          min_zoom=2, max_zoom=15, zoom_start=3)
  TimestampedGeoJson(
      buildGeoJson(points),
      add_last_point=False,
      auto_play=False,
      loop_button=True,
      date_options='YYYY/MM/DD',
      time_slider_drag_update=True,
      duration='P0D'
  ).add_to(map_corona)

  return map_corona

metrics_per_state_over_time_map()