# 0 Imports

In [46]:
import pandas as pd
import json
import datetime
from dateutil.relativedelta import relativedelta
from plotly.subplots import make_subplots
import plotly.graph_objects as go

## 0.1 Initializing the dataframe

In [169]:
data = pd.read_csv('https://s3.sa-east-1.amazonaws.com/ckan.saude.gov.br/SRAG/2025/INFLUD25-06-10-2025.csv', sep = ';', low_memory = False)

In [174]:
for column in data.columns:
    if 'UTI' in column:
        print(column)

UTI
DT_ENTUTI
DT_SAIDUTI


In [None]:
data.value

NU_NOTIFIC
31735451969802    1
31735516593521    1
31735544021940    1
31735559043222    1
31735560267779    1
                 ..
31759708032675    1
31759708640962    1
31759715834473    1
31759722690545    1
31759724191654    1
Name: count, Length: 267912, dtype: int64

In [None]:
columns = ['EVOLUCAO', 'UTI', 'DT_NOTIFIC', 'SG_UF_NOT', 'VACINA_COV', 'HOSPITAL', 'DT_SIN_PRI', 'DT_ENTUTI', 'DT_SAIDUTI']

data['DT_NOTIFIC'] = pd.to_datetime(data['DT_NOTIFIC'])
data['DT_SIN_PRI'] = pd.to_datetime(data['DT_SIN_PRI'])

for column in columns:
    data[column] = data[column].fillna(data[column].mode()[0])

In [55]:
data = data[columns]
data['DT_NOTIFIC'] = pd.to_datetime(data['DT_NOTIFIC'])

## 0.2 Helper functions

In [74]:
last_month = datetime.datetime.now() - relativedelta(months = 1)

In [118]:
def last_month_calc(column: str, filter):
    if filter:
        temp = data.copy()
        temp = temp[temp[column] == filter]
    return  data[data['DT_SIN_PRI'] > last_month].sort_values('DT_SIN_PRI').groupby('DT_SIN_PRI').size()

def whole_year_calc(column: str, filter):
    if filter:
        temp = data.copy()
        temp = temp[temp[column] == filter]
    return data.set_index('DT_SIN_PRI').resample('ME').count()

def plot_sub(x_month,  x_year,  title, y_title):
    fig = make_subplots(rows=1, cols=2, subplot_titles=("Last Month", "This Year"))
    fig.add_trace(go.Scatter(x=x_month.index, y=x_month.values, mode='lines+markers', name='Last Month'), row=1, col=1)
    fig.add_trace(go.Scatter(x=x_year.index, y=x_year.values, mode='lines+markers', name='This Year'), row=1, col=2)
    fig.update_layout(title_text=title)
    fig.update_yaxes(title_text=y_title)
    fig.show()

In [119]:
last_month = datetime.datetime.today() - relativedelta(months=1)

In [120]:
data['DT_NOTIFIC'].min()

Timestamp('2024-12-29 00:00:00')

This part is tricky because if we use by the first synmptons and not the DT_NOTIFIC
we will be missing an overlap, the pacient that had one new check but it is still sick

## 1. Here i will check the infection rate

In [121]:
infected_last_month = last_month_calc('DT_SIN_PRI', None)
infected_last_year = whole_year_calc('DT_SIN_PRI', None)

In [122]:
infected_last_month

DT_SIN_PRI
2025-09-08    732
2025-09-09    746
2025-09-10    901
2025-09-11    685
2025-09-12    767
2025-09-13    678
2025-09-14    753
2025-09-15    865
2025-09-16    691
2025-09-17    641
2025-09-18    682
2025-09-19    699
2025-09-20    810
2025-09-21    628
2025-09-22    642
2025-09-23    548
2025-09-24    478
2025-09-25    514
2025-09-26    530
2025-09-27    435
2025-09-28    496
2025-09-29    407
2025-09-30    261
2025-10-01    184
2025-10-02     65
2025-10-03     26
2025-10-04     13
2025-10-05      8
dtype: int64

In [123]:
infected_this_year

Unnamed: 0_level_0,EVOLUCAO,UTI,SG_UF_NOT,VACINA_COV,HOSPITAL,DT_SIN_PRI
DT_NOTIFIC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-12,201,201,201,201,201,201
2025-01,13005,13005,13005,13005,13005,13005
2025-02,14868,14868,14868,14868,14868,14868
2025-03,19775,19775,19775,19775,19775,19775
2025-04,29619,29619,29619,29619,29619,29619
2025-05,52434,52434,52434,52434,52434,52434
2025-06,52135,52135,52135,52135,52135,52135
2025-07,35367,35367,35367,35367,35367,35367
2025-08,25413,25413,25413,25413,25413,25413
2025-09,23320,23320,23320,23320,23320,23320


In [125]:
# fig = make_subplots(rows = 1, cols = 2, subplot_titles = ('Casos ultimo mes', 'Casos Mensais'))
# fig.add_trace(go.Line(x = infected_last_month.index, y = infected_last_month.values, name = 'Casos Ultimo Mes'), row = 1, col = 1)
# fig.add_trace(go.Line(x = infected_this_year.index, y = infected_this_year['EVOLUCAO'].values, name = 'Casos Mensais'), row = 1, col = 2)
# fig.update_layout(title_text = 'Casos de COVID-19 no Brasil')
# fig.show()

plot_sub(infected_last_month, infected_this_year['EVOLUCAO'], 'Infection Rate', 'Number of Infections')

## 2. Death Rate

In [140]:
deaths = data[(data['DT_SIN_PRI'] > last_month) & (data['EVOLUCAO'] == 2)]
deaths_grouped = deaths.set_index('DT_SIN_PRI').resample('D')
total_grouped = data[data['DT_SIN_PRI'] > last_month].set_index('DT_SIN_PRI').resample('D')
deaths_rate_month = (deaths_grouped.size() / total_grouped.size()) * 100
deaths_rate_month = deaths_rate.fillna(0)
deaths_rate_month 

DT_SIN_PRI
2025-09-08    3.825137
2025-09-09    4.289544
2025-09-10    4.439512
2025-09-11    3.357664
2025-09-12    3.650587
2025-09-13    4.719764
2025-09-14    2.523240
2025-09-15    3.236994
2025-09-16    3.473227
2025-09-17    2.652106
2025-09-18    1.906158
2025-09-19    1.573677
2025-09-20    2.839506
2025-09-21    1.751592
2025-09-22    1.557632
2025-09-23    1.824818
2025-09-24    1.046025
2025-09-25    0.972763
2025-09-26    1.698113
2025-09-27    1.609195
2025-09-28    0.806452
2025-09-29    0.491400
2025-09-30    0.383142
2025-10-01    1.630435
2025-10-02    0.000000
2025-10-03    0.000000
2025-10-04    0.000000
2025-10-05    0.000000
Freq: D, dtype: float64

In [141]:
deaths_year = data[(data['EVOLUCAO'] == 2)]
deaths_grouped_year = deaths_year.set_index('DT_SIN_PRI').resample('ME')
total_grouped_year = data.set_index('DT_SIN_PRI').resample('ME')

(deaths_grouped_year.size() / total_grouped_year.size()) * 100

DT_SIN_PRI
2024-12-31    11.442385
2025-01-31    11.073589
2025-02-28     8.453725
2025-03-31     6.206046
2025-04-30     5.765118
2025-05-31     7.072834
2025-06-30     6.969300
2025-07-31     6.264805
2025-08-31     5.278202
2025-09-30     2.954328
2025-10-31     1.013514
Freq: ME, dtype: float64

In [142]:
deaths_year = data[data['EVOLUCAO'] == 2].set_index('DT_NOTIFIC').resample('ME').count()
total_cases = data.set_index('DT_NOTIFIC').resample('ME').count()
perc_deaths = (deaths_year['EVOLUCAO'] / total_cases['EVOLUCAO']) * 100
perc_deaths

DT_NOTIFIC
2024-12-31    11.442786
2025-01-31    10.795848
2025-02-28     8.709981
2025-03-31     6.528445
2025-04-30     5.391809
2025-05-31     6.882939
2025-06-30     7.012564
2025-07-31     6.664405
2025-08-31     5.760831
2025-09-30     3.640652
2025-10-31     1.633803
Freq: ME, Name: EVOLUCAO, dtype: float64

In [145]:
plot_sub(deaths_rate_month, perc_deaths, 'Death Rate', 'Number of Deaths')

## 3. UTI ocuppation rate

In [146]:
uti_last_month = data[(data['DT_NOTIFIC'] > last_month) & (data['UTI'] == 1)].set_index('DT_NOTIFIC').resample('D').size()
all_occupation = data[data['DT_NOTIFIC'] > last_month].set_index('DT_NOTIFIC').resample('D').size()
uti_perc_month = (uti_last_month / all_occupation) * 100
uti_perc_month

DT_NOTIFIC
2025-09-08    27.454387
2025-09-09    25.108696
2025-09-10    24.893617
2025-09-11    26.832298
2025-09-12    24.763033
2025-09-13    20.052083
2025-09-14    24.012158
2025-09-15    28.200972
2025-09-16    27.854331
2025-09-17    24.215247
2025-09-18    23.931624
2025-09-19    24.721879
2025-09-20    21.428571
2025-09-21    21.533923
2025-09-22    27.679325
2025-09-23    26.940639
2025-09-24    21.814007
2025-09-25    24.776501
2025-09-26    20.972644
2025-09-27    25.899281
2025-09-28    24.100719
2025-09-29    25.140187
2025-09-30    25.454545
2025-10-01    23.574730
2025-10-02    26.256078
2025-10-03    22.955145
2025-10-04    23.076923
2025-10-05    25.000000
Freq: D, dtype: float64

In [147]:
uti_year = data[data['UTI'] == 1].set_index('DT_NOTIFIC').resample('ME').count()
total_year = data.set_index('DT_NOTIFIC').resample('ME').count()
uti_perc_year = (uti_year['UTI'] / total_year['UTI']) * 100
uti_perc_year

DT_NOTIFIC
2024-12-31    29.353234
2025-01-31    30.549789
2025-02-28    29.808986
2025-03-31    28.738306
2025-04-30    27.009690
2025-05-31    24.323912
2025-06-30    24.110482
2025-07-31    25.359799
2025-08-31    26.112620
2025-09-30    25.111492
2025-10-31    24.394366
Freq: ME, Name: UTI, dtype: float64

In [149]:
plot_sub(uti_perc_month, uti_perc_year, 'UTI Occupation Rate', 'Percentage of UTI Occupation')

## 4. Vaccination rate

In [161]:
vax_last_month = data[(data['DT_SIN_PRI'] > last_month) & (data['VACINA_COV'] == 1)].set_index('DT_NOTIFIC').resample('D').size()
total_month_vax = data[data['DT_SIN_PRI'] > last_month].set_index('DT_NOTIFIC').resample('D').size()
vax_perc_month = (vax_last_month / total_month_vax) * 100
vax_perc_month

DT_NOTIFIC
2025-09-08    61.538462
2025-09-09    50.609756
2025-09-10    52.684564
2025-09-11    46.310433
2025-09-12    43.762376
2025-09-13    45.847176
2025-09-14    46.494465
2025-09-15    45.910020
2025-09-16    46.028037
2025-09-17    45.865633
2025-09-18    46.095718
2025-09-19    45.968883
2025-09-20    41.379310
2025-09-21    44.859813
2025-09-22    42.663043
2025-09-23    48.959608
2025-09-24    43.935644
2025-09-25    45.616438
2025-09-26    43.954248
2025-09-27    42.592593
2025-09-28    42.909091
2025-09-29    47.696737
2025-09-30    45.760431
2025-10-01    45.110410
2025-10-02    42.978003
2025-10-03    43.126685
2025-10-04    43.243243
2025-10-05    32.692308
Freq: D, dtype: float64

In [158]:
vax_year = data[data['VACINA_COV'] == 1].set_index('DT_SIN_PRI').resample('ME').count()
total_year_vax = data.set_index('DT_SIN_PRI').resample('ME').count()
vax_perc_year = (vax_year['VACINA_COV'] / total_year_vax['VACINA_COV']) * 100
vax_perc_year

DT_SIN_PRI
2024-12-31    91.780822
2025-01-31    91.557619
2025-02-28    90.114188
2025-03-31    88.259202
2025-04-30    86.988191
2025-05-31    70.792044
2025-06-30    47.184636
2025-07-31    47.076424
2025-08-31    47.131838
2025-09-30    45.385118
2025-10-31    43.918919
Freq: ME, Name: VACINA_COV, dtype: float64

In [None]:
data.UTI = data.UTI.fillna(2)

In [30]:
data.VACINA_COV = data['VACINA_COV'].fillna(1)

In [32]:
data.HOSPITAL = data.HOSPITAL.fillna(1)
