# Automated Post-integration Report - Signaux Faibles
This notebook can be run after each new data integration by the [opensignauxfaibles](https://github.com/signaux-faibles/opensignauxfaibles) codebase.

In [None]:
VARIABLES = [
    "financier_court_terme",
    "interets",
    "ca",
    "equilibre_financier",
    "endettement",
    "degre_immo_corporelle",
    "liquidite_reduite",
    "poids_bfr_exploitation",
    "productivite_capital_investi",
    "rentabilite_economique",
    "rentabilite_nette",
    "cotisation",
    "cotisation_moy12m",
    "montant_part_ouvriere",
    "montant_part_ouvriere_past_1",
    "montant_part_ouvriere_past_12",
    "montant_part_ouvriere_past_2",
    "montant_part_ouvriere_past_3",
    "montant_part_ouvriere_past_6",
    "montant_part_patronale",
    "montant_part_patronale_past_1",
    "montant_part_patronale_past_12",
    "montant_part_patronale_past_2",
    "montant_part_patronale_past_3",
    "montant_part_patronale_past_6",
    "ratio_dette",
    "ratio_dette_moy12m",
    "effectif",
    "apart_heures_consommees_cumulees",
    "apart_heures_consommees",
    "paydex_nb_jours",
    "paydex_nb_jours_past_12",
]
# ces variables sont toujours requêtées
VARIABLES += ["outcome", "periode", "siret", "siren", "time_til_outcome", "code_naf"]

## Fetch a random sample of the data

In [None]:
%config Completer.use_jedi = False
import pandas as pd

In [None]:
from predictsignauxfaibles.data import SFDataset

In [None]:
dataset = SFDataset(
    fields = VARIABLES,
    sample_size=10_000
)
dataset.fetch_data();

## Temporal Coverage and NA values

In [None]:
dataset.data.periode = pd.to_datetime(dataset.data.periode)

In [None]:
date_range = dataset.data.periode.min().date(), dataset.data.periode.max().date()
print(f"Data goes from {date_range[0]} to {date_range[1]}")

In [None]:
(dataset.data.isna().sum() / len(dataset) * 100).sort_values(ascending = False).to_frame()

## Coverage over time for selected variables

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
def count_na_prop(series):
    return (1 - series.isna().sum() / len(series)) * 100


fig, axs = plt.subplots(len(VARIABLES), figsize=(10, 100))
fig.tight_layout()
for i, variable in enumerate(VARIABLES):
    grouped = dataset.data.groupby(pd.Grouper(key="periode", freq="M")).agg({f"{variable}": count_na_prop})
    axs[i].set_title(f"{variable}")
    axs[i].set_ylim([0, 100])
    axs[i].plot_date(grouped.index, grouped[f"{variable}"], "-");
    axs[i].set(adjustable='box')