# Download VIEWS predictions

**NOTE: Requires a certificate/access to the VIEWS database.**

This notebooks allows you to fetch and download data from a given VIEWS run (data release), with or without filters for months and countries of interest. 

In [None]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook

import warnings
warnings.filterwarnings('ignore')

# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
from views_runs import storage, ModelMetadata
from views_runs.storage import store, retrieve, fetch_metadata
from views_forecasts.extensions import *

# Packages from this repository, Tools folder
import sys
sys.path.append('../../')
sys.path.append('../../Tools')
sys.path.append('../../Intermediates')
sys.path.append('../../SystemUpdates')
from Ensembling import CalibratePredictions, RetrieveStoredPredictions, mean_sd_calibrated, gam_calibrated
from FetchData import FetchData, RetrieveFromList
from ViewsEstimators import *

# VIEWS mapper2
from views_mapper2.label_writer import *


## Specify the output folder

In [None]:
import os
home = os.path.expanduser("~")

#### Option 1: Save to your desktop

In [None]:
 desktop = home+'/Desktop' # (Toggle on to save to desktop)

#### Option 2: Save to the VIEWS Dropbox

In [None]:
# Mydropbox = home + '/Dropbox (ViEWS)/ViEWS/' # (Toggle on to save to Dropbox)
# Monthly_updates = Mydropbox + 'DataReleases/MonthlyUpdates/' # (Toggle on to save to Dropbox)

## Find month_id of interest

In [None]:
# Functions to easily obtain month_ids

def vid2date(i):
    year=str(ViewsMonth(i).year)
    month=str(ViewsMonth(i).month)
    return year+'/'+monthb

In [None]:
# Print month_ids for the chosen period

print(ViewsMonth.from_year_month(year=2022, month=7)) 

## Specify run (data release)

In [None]:
dev_id = 'fatalities001' # Change to model of interest
run_id = dev_id 
EndOfHistory = 511 # Change to last month of input data (of interest)
prod_id = '2022_07_t01' # Change to the run_id that matches the last month of input data above
level = 'cm' # Choose either country-month (cm) or PRIO-GRID-month (pgm) level of analysis

For more information on the naming conventions required for the cell above, please see the VIEWS changelog at https://github.com/prio-data/viewsforecasting/blob/main/CHANGELOG.md

## Fetch and download standard output data (monthly predictions)

**TODO:**

- Add columns with non-logged ensemble results
- Include columns that translate month_IDs and country_IDs to human-friendly names

In [None]:
predstore_future = level +  '_' + 'genetic_ensemble' + '_f' + str(EndOfHistory)
predstore_future_dich = level +  '_' + 'genetic_ensemble' + '_dich_f' + str(EndOfHistory)

predictions_df = pd.DataFrame.forecasts.read_store(predstore_future, run=dev_id)
predictions_dich_df = pd.DataFrame.forecasts.read_store(predstore_future_dich, run=dev_id)

# Redefines month_id into steps as an index, changes the name of some variables for consistency/workability with code
# Note that the output 'step combined' is already a log transformed variable that uses state-based violence
predictions_df = predictions_df.reset_index()
predictions_df['step'] = predictions_df['month_id'] - EndOfHistory
predictions_df = predictions_df.set_index(['step', 'country_id'], drop = True)
predictions_df['ln1_step_combined_sb'] = predictions_df['step_combined']
predictions_df['ln2_step_combined_sb'] = np.log1p(predictions_df['ln1_step_combined_sb'])

# Predictions_dich_df gives state-based violence
predictions_dich_df = predictions_dich_df.reset_index()
predictions_dich_df['step'] = predictions_dich_df['month_id'] - EndOfHistory
predictions_dich_df = predictions_dich_df.set_index(['step', 'country_id'], drop = True)
predictions_dich_df['step_combined_sb'] = predictions_dich_df['step_combined']

In [None]:
display(predictions_df)

### Name the .csv-file

In [None]:
predictions_df.to_csv('predictions.csv')

## Fetch and download aggregated predictions 

**TODO**

- Insert cell below to fetch the run specified at the top of the document, AND aggregate/sum up the ensemble results (logged AND non-logged) for: 
    - Rolling 3-month intervals (with months covered as the column name)
    - Rolling 6-month intervals (with months covered as the column name)
    - Rolling 12-month intervals (with months covered as the column name)
- Include columns that translate month_IDs and country_IDs to human-friendly names

### Name the .csv-file

In [None]:
predictions_df.to_csv('aggregated_predictions.csv')

In [None]:
print("All done")