## Import libraries

In [1]:
import sys
import os
current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir, '..'))
sys.path.append(project_root)

import pandas as pd
import utils.NN_preprocessing as NN_preprocessing
import matplotlib.pyplot as plt
import numpy as np
import utils.visualization as visualization
import utils.df_operations as df_operations


## Define Parameters

In [2]:
lags = 3
ntraps = 3

## Create dataset

In [None]:
data_path = f'../results/final_dfs/final_df_lag{lags}_ntraps{ntraps}.parquet'



if os.path.exists(data_path):
    # data import and preprocessing
    data = pd.read_parquet(data_path,engine="pyarrow", use_threads=True)
else:
    data = NN_preprocessing.create_final_matrix(ntraps, lags)

original_data = pd.read_csv('../data/final_data.csv')
info_df = original_data[['nplaca','mes','anoepid']]
#data_extended = pd.merge(data, info_df, on='nplaca',how='inner')

## Analyze days difference for each lag

In [None]:
diff_days_dict = {}
for j in range(1, lags + 1):
    diff_days_dict[f'lag{j}'] = pd.concat([data[f'days{i}_lag{j}'] for i in range(ntraps)],axis=0).value_counts().to_dict()


for j in range(1, lags + 1):
    plt.figure()
    plt.title(f'Distribution of days for lag{j}')
    plt.bar((diff_days_dict[f'lag{j}']).keys(), (diff_days_dict[f'lag{j}']).values(), label=f'lag{j}')
    plt.xlim(0, 60)
    plt.xlabel('Days')
    plt.ylabel('Frequency')
    plt.show()


## Median of lags x novos

### All data

In [None]:
visualization.NovosxRelation_plot_traps(data, lags, ntraps, 'median')

In [None]:
visualization.NovosxRelation_plot_lags(data, lags, ntraps, 'median')

### By year

#### 2023_24 (epidemic year)


In [None]:
data_2023_24 =  df_operations.row_with_value(data_extended, 'anoepid', '2023_24')


In [None]:
visualization.NovosxRelation_plot_lags(data_2023_24, lags, ntraps, 'median')


In [None]:
visualization.NovosxRelation_plot_traps(data_2023_24, lags, ntraps, 'median')


#### 2012_13 (epidemic year)

In [None]:
data_2012_13 =  df_operations.row_with_value(data_extended, 'anoepid', '2012_13')


In [None]:
visualization.NovosxRelation_plot_lags(data_2012_13, lags, ntraps, 'median')

In [None]:
visualization.NovosxRelation_plot_traps(data_2012_13, lags, ntraps, 'median')

#### 2016_17 (no epidemic year)

In [None]:
data_2016_17 =  df_operations.row_with_value(data_extended, 'anoepid', '2016_17')


In [None]:
visualization.NovosxRelation_plot_lags(data_2016_17, lags, ntraps, 'median')

In [None]:
visualization.NovosxRelation_plot_traps(data_2016_17, lags, ntraps, 'median')

#### 2021_22 (no epidemic year)


In [None]:
data_2021_22 =  df_operations.row_with_value(data_extended, 'anoepid', '2021_22')


In [None]:
visualization.NovosxRelation_plot_lags(data_2021_22, lags, ntraps, 'median')

In [None]:
visualization.NovosxRelation_plot_traps(data_2021_22, lags, ntraps, 'median')

### By month

#### August

In [None]:
data_august =  df_operations.row_with_value(data_extended, 'mes', 8)


In [None]:
visualization.NovosxRelation_plot_lags(data_august, lags, ntraps, 'median')

In [None]:
visualization.NovosxRelation_plot_traps(data_august, lags, ntraps, 'median')

#### October

In [None]:
data_october =  df_operations.row_with_value(data_extended, 'mes', 10)


In [None]:
visualization.NovosxRelation_plot_lags(data_october, lags, ntraps, 'median')

In [None]:
visualization.NovosxRelation_plot_traps(data_october, lags, ntraps, 'median')

#### January DATA FROM THIS MONTH WERE DROPPED!

In [None]:
data_january =  df_operations.row_with_value(data_extended, 'mes', 1)


In [None]:
visualization.NovosxRelation_plot_lags(data_january, lags, ntraps, 'median')

In [None]:
visualization.NovosxRelation_plot_traps(data_january, lags, ntraps, 'median')

#### May

In [None]:
data_may =  df_operations.row_with_value(data_extended, 'mes', 5)


In [None]:
visualization.NovosxRelation_plot_lags(data_may, lags, ntraps, 'median')

In [None]:
visualization.NovosxRelation_plot_traps(data_may, lags, ntraps, 'median')

## Mean of lags x novos

### All data

In [None]:
visualization.NovosxRelation_plot_traps(data, lags, ntraps, 'mean')

In [None]:
visualization.NovosxRelation_plot_lags(data, lags, ntraps, 'mean')

### By year

#### 2023_24 (epidemic year)


In [None]:
data_2023_24 =  df_operations.row_with_value(data_extended, 'anoepid', '2023_24')


In [None]:
visualization.NovosxRelation_plot_lags(data_2023_24, lags, ntraps, 'mean')


In [None]:
visualization.NovosxRelation_plot_traps(data_2023_24, lags, ntraps, 'mean')


#### 2012_13 (epidemic year)

In [None]:
data_2012_13 =  df_operations.row_with_value(data_extended, 'anoepid', '2012_13')


In [None]:
visualization.NovosxRelation_plot_lags(data_2012_13, lags, ntraps, 'mean')

In [None]:
visualization.NovosxRelation_plot_traps(data_2012_13, lags, ntraps, 'mean')

#### 2016_17 (no epidemic year)

In [None]:
data_2016_17 =  df_operations.row_with_value(data_extended, 'anoepid', '2016_17')


In [None]:
visualization.NovosxRelation_plot_lags(data_2016_17, lags, ntraps, 'mean')

In [None]:
visualization.NovosxRelation_plot_traps(data_2016_17, lags, ntraps, 'mean')

#### 2021_22 (no epidemic year)


In [None]:
data_2021_22 =  df_operations.row_with_value(data_extended, 'anoepid', '2021_22')


In [None]:
visualization.NovosxRelation_plot_lags(data_2021_22, lags, ntraps, 'mean')

In [None]:
visualization.NovosxRelation_plot_traps(data_2021_22, lags, ntraps, 'mean')

### By month

#### August

In [None]:
data_august =  df_operations.row_with_value(data_extended, 'mes', 8)


In [None]:
visualization.NovosxRelation_plot_lags(data_august, lags, ntraps, 'mean')

In [None]:
visualization.NovosxRelation_plot_traps(data_august, lags, ntraps, 'mean')

#### October

In [None]:
data_october =  df_operations.row_with_value(data_extended, 'mes', 10)


In [None]:
visualization.NovosxRelation_plot_lags(data_october, lags, ntraps, 'mean')

In [None]:
visualization.NovosxRelation_plot_traps(data_october, lags, ntraps, 'mean')

#### January DATA FROM THIS MONTH WAS DROPPED!

In [None]:
data_january =  df_operations.row_with_value(data_extended, 'mes', 1)


In [None]:
visualization.NovosxRelation_plot_lags(data_january, lags, ntraps, 'mean')

In [None]:
visualization.NovosxRelation_plot_traps(data_january, lags, ntraps, 'mean')

#### May

In [None]:
data_may =  df_operations.row_with_value(data_extended, 'mes', 5)


In [None]:
visualization.NovosxRelation_plot_lags(data_may, lags, ntraps, 'mean')

In [None]:
visualization.NovosxRelation_plot_traps(data_may, lags, ntraps, 'mean')