# Variance Inflation Factor

In [1]:
import pandas as pd
import numpy as np
from patsy import dmatrices
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import pickle

In [2]:
COUNTRY = "Nigeria"

In [3]:
PATH_TO_DATA_FOLDER = "../Dataset time-series/output_data/" + COUNTRY + "/"

## Time-series dataset

In [4]:
# Load the time-series data (daily interpolation).
df = pd.read_csv(PATH_TO_DATA_FOLDER + COUNTRY + ".csv", header = [0, 1], index_col = 0)
df.index.name = "Datetime"
df.index = pd.to_datetime(df.index)
freq = "D"
df.index.freq = freq

In [5]:
df.columns.get_level_values(1).unique()

Index(['1 Month Anomaly Rainfalls (%)', '3 Months Anomaly Rainfalls (%)',
       'Area', 'Code', 'FCG', 'Fatalities', 'Lat', 'Lon', 'NDVI',
       'NDVI Anomaly', 'Population', 'Price cereals and tubers',
       'Rainfalls (mm)', 'Ramadan', 'Waterways', 'rCSI'],
      dtype='object', name='Indicator')

In [6]:
# Consider only some indicators for the analysis with Spearman correlation.
INDICATORS_TO_CONSIDER = ["FCG", "1 Month Anomaly Rainfalls (%)", "3 Months Anomaly Rainfalls (%)", 
                          "NDVI", "Price cereals and tubers", "Fatalities", "NDVI Anomaly", 
                          "rCSI", "Rainfalls (mm)"] # "Exchange rate"

df = df.loc[:, df.columns.get_level_values(1).isin(INDICATORS_TO_CONSIDER)]
df.head()

AdminStrata,Adamawa,Adamawa,Adamawa,Adamawa,Adamawa,Adamawa,Adamawa,Adamawa,Adamawa,Borno,Borno,Borno,Yobe,Yobe,Yobe,Yobe,Yobe,Yobe,Yobe,Yobe,Yobe
Indicator,1 Month Anomaly Rainfalls (%),3 Months Anomaly Rainfalls (%),FCG,Fatalities,NDVI,NDVI Anomaly,Price cereals and tubers,Rainfalls (mm),rCSI,1 Month Anomaly Rainfalls (%),...,rCSI,1 Month Anomaly Rainfalls (%),3 Months Anomaly Rainfalls (%),FCG,Fatalities,NDVI,NDVI Anomaly,Price cereals and tubers,Rainfalls (mm),rCSI
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-01-01,,,,,,,,,,,...,,,,,,,,,,
2018-01-02,,,,,,,,,,,...,,,,,,,,,,
2018-01-03,,,,,,,,,,,...,,,,,,,,,,
2018-01-04,,,,,,,,,,,...,,,,,,,,,,
2018-01-05,,,,,,,,,,,...,,,,,,,,,,


In [7]:
keep_path = f"../Time-series Forecasting/RESULTS&ANALYSIS/RESULTS/Analysis 2022/{COUNTRY}/lags_dict"

with open(keep_path, "rb") as f:
    lags_dict = pickle.load(f)
    keep = list(set(INDICATORS_TO_CONSIDER).intersection(set(lags_dict.keys())))
    
df = df.loc[:, df.columns.get_level_values(1).isin(keep)]

In [8]:
mapping = {"1 Month Anomaly Rainfalls (%)": "omonthrainfalls", 
           "3 Months Anomaly Rainfalls (%)": "tmonthrainfalls", 
           "Exchange rate": "exchange", 
           "FCG": "fcg",
           "Fatalities": "fatalities",
           "NDVI Anomaly": "ndvianonaly",
           "NDVI": "ndvi",
           "Price cereals and tubers": "pricecereals", 
           "Rainfalls (mm)": "rainfalls", 
           "rCSI": "rcsi"}

In [9]:
df.rename(mapping, axis = 1, level = 1, inplace = True)

In [10]:
def vif_func(group):
    group = group[group.name]

    # Gather features.
    features = "+".join(group.columns[group.columns != "fcg"])

    # Get y and X dataframes based on this regression.
    y, X = dmatrices('fcg ~' + features, group, return_type = "dataframe")
    
    # For each X, calculate VIF and save in dataframe.
    vif = pd.DataFrame()
    vif["VIF Factor"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
    vif["features"] = X.columns
    
    return vif.set_index("features")

vif_factor = df.groupby(axis = 1, level = 0).apply(vif_func) 
vif_factor.head()

AdminStrata,Adamawa,Borno,Yobe
Unnamed: 0_level_1,VIF Factor,VIF Factor,VIF Factor
features,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Intercept,1105.855366,408.472375,804.167547
omonthrainfalls,1.369193,1.23556,1.544279
tmonthrainfalls,1.810771,1.53328,1.400369
fatalities,1.066254,1.297765,1.247238
ndvianonaly,2.068475,1.304274,2.057392


In [11]:
vif = vif_factor.mean(axis = 1).to_frame().rename({0: "VIF"}, axis = 1)
vif.drop("Intercept", axis = 0, inplace = True)
vif

Unnamed: 0_level_0,VIF
features,Unnamed: 1_level_1
omonthrainfalls,1.383011
tmonthrainfalls,1.581473
fatalities,1.203752
ndvianonaly,1.810047
pricecereals,2.857979
rainfalls,1.486895
rcsi,2.717909


In [12]:
import dataframe_image as dfi

vif.style.export_png(f"./output_images/vif_{COUNTRY}.png")