# Import stuff

In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import json

import seaborn as sns
import os
from multiprocessing import Pool
from pathlib import Path
import sys

from fuzzywuzzy import process


# set path to the utils
notebook_dir = os.getcwd()
notebook_name = "experiments_new_plots_01.ipynb"

PATH = Path(notebook_dir) / Path(notebook_name) 

sys.path.insert(0, str(Path(*[i for i in PATH.parts[:PATH.parts.index("VIEWS_FAO_index")+1]]) / "src/utils"))   

from set_paths import setup_project_paths, get_logo_path, get_data_paths, setup_root_paths
setup_project_paths(PATH)

from utils_plotting import plot_time_series, plot_random_monthly_and_yearly_data, plot_feature_histograms, plot_contry_period_map
from utils_annual_aggregation import aggregate_monthly_to_yearly
from utils_feature_eng_per_100k import feature_eng_fat_per_100k
#from utils_cumulative_distribution import calculate_global_cumulative_distribution
from utils_global_probabilities import calculate_global_probabilities
from utils_country_probabilities import calculate_all_country_probabilities
from utils_return_periods import calculate_return_periods
from utils_check_expected_features import check_expected_features    
from utils_date_index import calculate_date_from_index 
from utils_country_id_csv_to_json import country_id_csv_to_json
from utils_get_country_names_by_ids import get_country_names_by_ids
from utils_get_country_id_by_name import get_country_id_by_name

# Check versions

In [None]:
print(np.__version__) # 1.26.4 used
print(pd.__version__) # 2.2.1 used
print(matplotlib.__version__) # 3.8.4 used
print(sns.__version__) # 0.13.2 used

# Load data

In [None]:
# load save minimal dataframes and check the expected feature agian
df_monthly = pd.read_pickle("/home/simon/Documents/scripts/VIEWS_FAO_index/data/processed/pilot_return_periods_monthly.pkl")
df_yearly = pd.read_pickle("/home/simon/Documents/scripts/VIEWS_FAO_index/data/processed/pilot_return_periods_yearly.pkl")

# Small utils:

In [None]:
# we can get a list of contry names from a list of ids
get_country_names_by_ids([42, 192, ])

In [None]:
# we can get  country ids from a name (fuzzy search)
get_country_id_by_name("Germany")

In [None]:
# we can get a MM.YYYY date from an month_id
calculate_date_from_index(0)

In [None]:
country_list = df_yearly['c_id'].unique().tolist()

# The plotting functions

In [None]:
plot_time_series(df_monthly, country_ids=[42], feature='fatalities_per_100k', figsize=(16, 8))

In [None]:
plot_time_series(df_yearly, country_ids=country_list[7:8], feature='fatalities_per_100k', figsize=(16, 8))

In [None]:
plot_feature_histograms(df_yearly, country_list[0:12], feature='fatalities_per_100k', figsize=(16, 16))


In [None]:
plot_feature_histograms(df_monthly, country_list[0:12], feature='fatalities_per_100k', figsize=(16, 16))

In [None]:
get_country_id_by_name("south")

In [None]:
plot_contry_period_map(df_yearly, 50, "fatalities_per_100k", [2010, 2011], figsize=(16, 12))

In [None]:
plot_contry_period_map(df_yearly, 50, "fatalities_per_100k", [1991, 2000, 2022], figsize=(16, 7))

In [None]:
plot_contry_period_map(df_yearly, 50, "sb_best_time_unit_return_period", [2020, 2021, 2022], figsize=(16, 7), marker_size=64)

In [None]:
df_yearly.columns

In [None]:
plot_feature_histograms(df_yearly, country_list[0:12], feature='sb_best_time_unit_return_period_country', figsize=(16, 16))

In [None]:
plot_feature_histograms(df_monthly, country_list[0:12], feature='sb_best_time_unit_likelihood_country', figsize=(16, 16))

In [None]:
df_yearly['sb_best_time_unit_likelihood']

In [None]:
# plot a return period hist for the whole world

# onlt values above 1
df_sub = df_yearly[(df_yearly['sb_best_time_unit_likelihood'] < 0.99) & (df_yearly['sb_best_time_unit_likelihood'] > 0.01)]

sns.histplot(df_sub['sb_best_time_unit_likelihood'], bins=100, kde=True)
plt.show()

In [None]:

# onlt values above 1
df_sub = df_yearly[(df_yearly['sb_best_time_unit_likelihood_country'] < 1) & (df_yearly['sb_best_time_unit_likelihood'] > 0.01)]

sns.histplot(df_sub['sb_best_time_unit_likelihood_country'], bins=100, kde=True)
plt.show()

In [None]:
# plot a return period hist for the whole world

# onlt values above 1
df_sub = df_yearly[df_yearly['sb_best_time_unit_return_period'] > 3]

sns.histplot(df_sub['sb_best_time_unit_return_period'], bins=100, kde=True)
plt.show()

In [None]:
#histogram of the return periods for the FAO_countries
FAO_countries = [47, 57, 237, 161, 50, 244, 162, 78, 120, 242, 235, 124, 158]

plt.hist(df_yearly[df_yearly['c_id'].isin(FAO_countries)]['sb_best_time_unit_return_period'], bins=100)



In [None]:
#

# take the minimal set that is covered by all countries
unique_periods = sorted(list(set.intersection(*[set(df_yearly[df_yearly['c_id'] == c_id]['year_id'].unique().tolist()) for c_id in FAO_countries])))

# unique peiros for the monthly data
unique_periods = sorted(list(set.intersection(*[set(df_monthly[df_monthly['c_id'] == c_id]['month_id'].unique().tolist()) for c_id in FAO_countries])))

print(unique_periods)

#for country in FAO_countries:
#    plot_contry_period_map(df_yearly, country, "sb_best_time_unit_return_period_country", unique_periods, figsize=(16, 38), marker_size=36)

# monthly
for country in FAO_countries:
    plot_contry_period_map(df_monthly, country, "sb_best_time_unit_return_period_country", unique_periods, figsize=(16, 38), marker_size=36)

In [None]:
get_country_id_by_name("Tan")