In [None]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from ipywidgets import FloatSlider
import pandas as pd
import sklearn
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import OLSInfluence
import numpy as np
import seaborn as sns
import shap
import matplotlib.pyplot as plt
import sklearn.preprocessing as skp
import pingouin as pg

In [None]:
import numpy as np
from scipy import linalg
from sklearn.utils import check_array, as_float_array
from sklearn.base import TransformerMixin, BaseEstimator


class ZCA(BaseEstimator, TransformerMixin):

    def __init__(self, regularization=10**-5, copy=False):
        self.regularization = regularization
        self.copy = copy

    def fit(self, X, y=None):
        X = check_array(X)
        X = as_float_array(X, copy = self.copy)
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_
        sigma = np.dot(X.T,X) / X.shape[1]
        U, S, V = linalg.svd(sigma)
        tmp = np.dot(U, np.diag(1/np.sqrt(S+self.regularization)))
        self.components_ = np.dot(tmp, U.T)
        return self

    def transform(self, X):
        X = check_array(X)
        X_transformed = X - self.mean_
        X_transformed = np.dot(X_transformed, self.components_.T)
        return X_transformed

In [None]:
def whiten(X, method='zca'):
		"""
		Whitens the input matrix X using specified whitening method.
		Inputs:
			X:      Input data matrix with data examples along the first dimension
			method: Whitening method. Must be one of 'zca', 'zca_cor', 'pca',
					'pca_cor', or 'cholesky'.
		"""
		X = X.reshape((-1, np.prod(X.shape[1:])))
		X_centered = X - np.mean(X, axis=0)
		Sigma = np.dot(X_centered.T, X_centered) / X_centered.shape[0]
		W = None
		if method in ['zca', 'pca', 'cholesky']:
			U, Lambda, _ = np.linalg.svd(Sigma)
			if method == 'zca':
				W = np.dot(U, np.dot(np.diag(1.0 / np.sqrt(Lambda + 1e-5)), U.T))
			elif method =='pca':
				W = np.dot(np.diag(1.0 / np.sqrt(Lambda + 1e-5)), U.T)
			elif method == 'cholesky':
				W = np.linalg.cholesky(np.dot(U, np.dot(np.diag(1.0 / (Lambda + 1e-5)), U.T))).T
		elif method in ['zca_cor', 'pca_cor']:
			V_sqrt = np.diag(np.std(X, axis=0))
			P = np.dot(np.dot(np.linalg.inv(V_sqrt), Sigma), np.linalg.inv(V_sqrt))
			G, Theta, _ = np.linalg.svd(P)
			if method == 'zca_cor':
				W = np.dot(np.dot(G, np.dot(np.diag(1.0 / np.sqrt(Theta + 1e-5)), G.T)), np.linalg.inv(V_sqrt))
			elif method == 'pca_cor':
				W = np.dot(np.dot(np.diag(1.0/np.sqrt(Theta + 1e-5)), G.T), np.linalg.inv(V_sqrt))
		else:
			raise Exception('Whitening method not found.')
		return np.dot(X_centered, W.T)

In [None]:
df_ = pd.read_csv("https://raw.githubusercontent.com/thistleknot/Python-Stock/master/data/raw/states.csv") 

In [None]:
df = sm.add_constant(df_)[[*df_.columns,'const']]

In [None]:
df

In [None]:
#df[['Poverty', 'White', 'Traf Deaths', 'University', 'Unemployed', 'Income', 'Population']]

In [206]:
def return_model(x,y):
    #print(x)
    names = [y,*x]
    print(names)
    #print()
    shap.initjs()
    
    data = df[names].sort_values(kind="quicksort", by=names[0], ascending=True,key=abs)
    X = data.iloc[:,1:]
    #zca = ZCA()
    #zca.fit(X)
    X_ = pd.DataFrame(whiten(np.array(X)),columns=X.columns).set_index(X.index)
    X_.index = X.index
    X_.columns = X.columns
    X = X_
    #print(X)
    #Y = scale(data.iloc[:,0], scale=True)
    Y = pd.DataFrame(skp.scale(data.iloc[:,0], with_mean=True, with_std=True))
    Y.columns = [y]
    Y.index = data.iloc[:,0].index
    
    model = sklearn.linear_model.LinearRegression()
    
    model.fit(X, Y)
    model_ = sm.OLS(Y,X)
    results = model_.fit()
    #shap
    background = np.array(X)
    e = shap.LinearExplainer(model, X)
    
    shap_values = e.shap_values(np.array(X))
    shap.summary_plot(shap_values, -np.array(X))
    explainer = shap.Explainer(model, X)
    shap.plots.heatmap(explainer(X))
 
    predict = results.predict(X.loc[Y.index])
    plt.plot(np.array(Y))
    plt.plot(np.array(predict))
    plt.show()
    
    corrMatrix = pd.concat([Y,X],axis=1).corr().sort_values(kind="quicksort", by=names[0], ascending=True,key=abs)
    sns.heatmap(corrMatrix, annot=True)
    plt.show()
    
    corrMatrix = pd.concat([Y,X],axis=1).pcorr().sort_values(kind="quicksort", by=names[0], ascending=True,key=abs)
    sns.heatmap(corrMatrix, annot=True)
    plt.show()
    
    #return([data,results.summary()])
    return(results.summary())

def derive_xnames(y):
    x_ = widgets.SelectMultiple(
        options=df.columns[~df.columns.isin([y])][1:],
        value=[df.columns[~df.columns.isin([y])][1]],
        disabled=False
    )
    subset = interact(return_model,x=x_,y=y)

    return(subset)

y=y=widgets.Select(options=df.columns[1:],disabled=False)

a=interact(derive_xnames,y=y)



interactive(children=(Select(description='y', options=('Poverty', 'Infant Mort', 'White', 'Crime', 'Doctors', …