In [3]:
import plotly.graph_objs as go
import dask_ml.datasets as dask_datasets
import sklearn.datasets as sk_datasets
import dask.dataframe as dd

In [6]:
def make_dataset_and_compute_correlation(func, **kwargs):
    X, y = func(**kwargs) 
    ddf_features = dd.from_array(X)
    
    corr = ddf_features.corr().compute()
    
    return corr

In [8]:
dask_corr = make_dataset_and_compute_correlation(dask_datasets.make_classification, 
                                                 n_samples=10000, n_informative=12, 
                                                 n_redundant=18, n_features=30, 
                                                 chunks=100)

In [9]:
sk_corr = make_dataset_and_compute_correlation(sk_datasets.make_classification, 
                                               n_samples=10000, n_informative=12, 
                                               n_redundant=18, n_features=30)

In [16]:
fig = go.FigureWidget()
heat_map = fig.add_heatmap(z=dask_corr.values)
fig.layout.title = 'Correlation Using dask-ml'
fig.layout.xaxis.title = 'Features'
fig.layout.yaxis.title = 'Features'
fig.layout.width = 500
fig.layout.height = 500
fig

FigureWidget({
    'data': [{'type': 'heatmap',
              'uid': '8f7d3e4c-93ef-11e8-ad04-acde48001122',
 …

In [17]:
fig = go.FigureWidget()
heat_map = fig.add_heatmap(z=sk_corr.values)
fig.layout.title = 'Correlation Using scikit-learn'
fig.layout.xaxis.title = 'Features'
fig.layout.yaxis.title = 'Features'
fig.layout.width = 500
fig.layout.height = 500
fig

FigureWidget({
    'data': [{'type': 'heatmap',
              'uid': '9654237a-93ef-11e8-a68d-acde48001122',
 …

In [13]:
!conda list

# packages in environment at /Users/solinjp1/anaconda3/envs/correlation_exploration:
#
# Name                    Version                   Build  Channel
appdirs                   1.4.3            py36h28b3542_0  
appnope                   0.1.0            py36hf537a9a_0  
asn1crypto                0.24.0                   py36_0  
attrs                     18.1.0                   py36_0  
automat                   0.7.0                    py36_0  
backcall                  0.1.0                    py36_0  
blas                      1.0                         mkl  
bleach                    2.1.3                    py36_0  
bokeh                     0.13.0                   py36_0  
ca-certificates           2018.03.07                    0  
cairo                     1.14.12              hc4e6be7_4  
certifi                   2018.4.16                py36_0  
cffi                      1.11.5           py36h342bebf_0  
chardet                   3.0.4                   