In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import os
from multiprocessing import Pool
from pathlib import Path
import sys

# set path to the utils
notebook_dir = os.getcwd()
notebook_name = "check_generated_data.ipynb"

PATH = Path(notebook_dir) / Path(notebook_name) #Path("/home/simon/Documents/scripts/views_pipeline/models/purple_alien/notebooks/test_to_prediction_store.ipynb")

sys.path.insert(0, str(Path(*[i for i in PATH.parts[:PATH.parts.index("VIEWS_FAO_index")+1]]) / "src/utils"))   

from set_paths import setup_project_paths
setup_project_paths(PATH)

from utils_plotting import plot_time_series_data, plot_random_monthly_and_yearly_data
from utils_annual_aggregation import aggregate_monthly_to_yearly
from utils_feature_eng_per_100k import feature_eng_fat_per_100k
#from utils_cumulative_distribution import calculate_global_cumulative_distribution
from utils_global_probabilities import calculate_global_probabilities
from utils_country_probabilities import calculate_all_country_probabilities
from utils_return_periods import calculate_return_periods
from utils_check_expected_features import check_expected_features                                                                         

In [None]:
print(np.__version__) # 1.26.4 used
print(pd.__version__) # 2.2.1 used
print(matplotlib.__version__) # 3.8.4 used
print(sns.__version__) # 0.13.2 used

# Load data

In [None]:
PATH_df = "/home/simon/Documents/scripts/VIEWS_FAO_index/data/raw_viewser/simon_full_base_01_viewser_df.pkl"
df_monthly = pd.read_pickle(PATH_df)
df_yearly = aggregate_monthly_to_yearly(df_monthly)

# validate_dataframe(df)

df_monthly = feature_eng_fat_per_100k(df_monthly)
df_yearly = feature_eng_fat_per_100k(df_yearly)

# now loop through the columns and calculate probabilities
columns = ['sb_best', 'ns_best', 'os_best', 'total_best', 'fatalities_per_100k', 'sb_per_100k', 'ns_per_100k', 'os_per_100k']
for col in columns:
    df_monthly = calculate_global_probabilities(df_monthly, col, 'month_id')
    df_yearly = calculate_global_probabilities(df_yearly, col, 'year_id')
    df_monthly = calculate_all_country_probabilities(df_monthly, col, 'month_id')
    df_yearly = calculate_all_country_probabilities(df_yearly, col, 'year_id')

# now loop through the columns and calculate return periods
for col in columns:
    df_monthly = calculate_return_periods(df_monthly, f'{col}_unit_likelihood', f'{col}_time_unit_likelihood')
    df_monthly = calculate_return_periods(df_monthly, f'{col}_unit_likelihood_country', f'{col}_time_unit_likelihood_country')
    df_yearly = calculate_return_periods(df_yearly, f'{col}_unit_likelihood', f'{col}_time_unit_likelihood')
    df_yearly = calculate_return_periods(df_yearly, f'{col}_unit_likelihood_country', f'{col}_time_unit_likelihood_country')

# check expected features in both dataframes
check_expected_features(df_monthly)
check_expected_features(df_yearly)

In [None]:
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_best')

In [None]:
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_per_100k')

In [None]:
# plot the new global probabilities
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_per_100k_time_unit_likelihood')
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_per_100k_unit_likelihood')

In [None]:
# plot the new country probabilities
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_per_100k_time_unit_likelihood_country')
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_per_100k_unit_likelihood_country')

In [None]:
# some return periods
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_per_100k_time_unit_return_period')
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_per_100k_unit_return_period')

In [None]:
# some return periods at country level
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_per_100k_time_unit_return_period_country')
plot_random_monthly_and_yearly_data(df_monthly, df_yearly, feature = 'sb_per_100k_unit_return_period_country')