In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('../input/covid19-healthy-diet-dataset/Food_Supply_Quantity_kg_Data.csv')
# To display the top 5 rows
data.head(5)

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
!conda install -c conda-forge Skater -y

In [None]:
import skater
from skater.core.explanations import Interpretation
from skater.model import InMemoryModel
from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
index=data[data['Recovered'].isnull()].index
data.drop(index,inplace=True)

In [None]:
features=['Alcoholic Beverages',	'Animal fats',	'Animal Products',	'Aquatic Products, Other',	'Cereals - Excluding Beer',	'Eggs'	,'Fish, Seafood',	
          'Fruits - Excluding Wine', 'Meat',	'Milk - Excluding Butter',	'Miscellaneous',	'Offals',	'Oilcrops',	'Pulses',	'Spices',	'Starchy Roots',
          'Stimulants',	'Sugar & Sweeteners', 'Sugar Crops',	
          'Treenuts',	'Vegetable Oils',	'Vegetables','Vegetal Products'
          ]
data[features] = data[features].astype(float)
data['Recovered'] = data['Recovered'].astype(float)

In [None]:
X = data[features]
y = data['Recovered']
# mapping the target to a binary class 
y = y.apply(lambda x: 0 if x <= 0.047469 else 1)

# quickly check that we have a balanced target partition
y.sum() / len(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
X_train.shape, X_test.shape

In [None]:
from xgboost import XGBClassifier, plot_importance

In [None]:
model = XGBClassifier(objective='binary:logistic', random_state=33, n_jobs=-1)
model.fit(X_train, y_train)

In [None]:
xgb_predictions = model.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
#function for plotting confusion_matrix
def plot_confusion_matrix(predict_y,test_y):
    C = confusion_matrix(test_y, predict_y)
    labels = ['1','0']
    plt.figure(figsize=(10,7))
    sns.heatmap(C, annot=True, fmt=".3f", xticklabels=labels, yticklabels=labels)
    plt.xlabel('Predicted Class')
    plt.ylabel('Original Class')
    plt.title("Confusion matrix")
    plt.show()

In [None]:
plot_confusion_matrix(xgb_predictions,y_test)

In [None]:
# ploting XGBoost default feature importances
fig = plt.figure(figsize = (18, 10))
title = fig.suptitle("Native Feature Importances from XGBoost", fontsize=14)

ax1 = fig.add_subplot(2, 2, 1)
plot_importance(model, importance_type='weight', ax=ax1, color='red')
ax1.set_title("Feature Importance with Feature Weight");

ax2 = fig.add_subplot(2, 2, 2)
plot_importance(model, importance_type='cover', ax=ax2, color='red')
ax2.set_title("Feature Importance with Sample Coverage");

ax3 = fig.add_subplot(2, 2, 3)
plot_importance(model, importance_type='gain', ax=ax3, color='red')
ax3.set_title("Feature Importance with Split Mean Gain");


In [None]:
xgb_array = XGBClassifier(objective='binary:logistic', random_state=33, n_jobs=-1)
xgb_array.fit(X_train.values, y_train)

In [None]:
predictions = xgb_array.predict_proba(X_test.values)

In [None]:
exp = LimeTabularExplainer(X_test.values, feature_names=features, discretize_continuous=True, class_names=['Less likely', 'More likely'])

In [None]:
condition = 0
print('Reference:', y_test.iloc[condition])
print('Predicted:', predictions[condition])
exp.explain_instance(X_test.iloc[condition].values, xgb_array.predict_proba).show_in_notebook()

In [None]:
explainer = LimeTabularExplainer(X_test.values, feature_names=features, class_names=data['Recovered'])
condition = 0                                                                                                                                    
exp = explainer.explain_instance(X_test.iloc[condition].values, xgb_array.predict_proba)
exp.as_pyplot_figure()                                                          

In [None]:
condition=1
print('Reference:', y_test.iloc[condition])
print('Predicted:', predictions[condition])
explainer.explain_instance(X_test.iloc[condition].values, xgb_array.predict_proba).show_in_notebook()