In [2]:
from scipy.spatial.distance import pdist, squareform
import numpy as np
import pandas as pd

def distcorr(X, Y):
    """ Compute the distance correlation function
    
    >>> a = [1,2,3,4,5]
    >>> b = np.array([1,2,9,4,4])
    >>> distcorr(a, b)
    0.762676242417
    """
    X = np.atleast_1d(X)
    Y = np.atleast_1d(Y)
    if np.prod(X.shape) == len(X):
        X = X[:, None]
    if np.prod(Y.shape) == len(Y):
        Y = Y[:, None]
    X = np.atleast_2d(X)
    Y = np.atleast_2d(Y)
    n = X.shape[0]
    if Y.shape[0] != X.shape[0]:
        raise ValueError('Number of samples must match')
    a = squareform(pdist(X))
    b = squareform(pdist(Y))
    A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
    B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()
    
    dcov2_xy = (A * B).sum()/float(n * n)
    dcov2_xx = (A * A).sum()/float(n * n)
    dcov2_yy = (B * B).sum()/float(n * n)
    dcor = np.sqrt(dcov2_xy)/np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))
    return dcor

In [3]:
DATA_PATH = "附件一：325个样本数据.xlsx"

df = pd.read_excel(DATA_PATH)

X = df.drop(['辛烷值RON'],axis = 1)
data = X.values
data = data[:,0:36]
m, n = data.shape
corrs = np.zeros((n, n))
for i in range(n):
    for j in range(i, n):
        corrs[i, j] = distcorr(data[:, i], data[:, j])
frame = pd.DataFrame(corrs)


In [4]:
frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 36 columns):
0     36 non-null float64
1     36 non-null float64
2     36 non-null float64
3     36 non-null float64
4     36 non-null float64
5     36 non-null float64
6     36 non-null float64
7     36 non-null float64
8     36 non-null float64
9     36 non-null float64
10    36 non-null float64
11    36 non-null float64
12    36 non-null float64
13    36 non-null float64
14    36 non-null float64
15    36 non-null float64
16    36 non-null float64
17    36 non-null float64
18    36 non-null float64
19    36 non-null float64
20    36 non-null float64
21    36 non-null float64
22    36 non-null float64
23    36 non-null float64
24    36 non-null float64
25    36 non-null float64
26    36 non-null float64
27    36 non-null float64
28    36 non-null float64
29    36 non-null float64
30    36 non-null float64
31    36 non-null float64
32    36 non-null float64
33    36 non-null float64
34    36 non-

In [6]:
frame

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26,27,28,29,30,31,32,33,34,35
0,1.0,0.254417,0.229559,0.361293,0.386621,0.185194,0.122462,0.072439,0.219227,0.151579,...,0.339294,0.079515,0.389837,0.354765,0.139421,0.077378,0.205718,0.60052,0.156012,0.147475
1,0.0,1.0,0.498817,0.444487,0.396567,0.192186,0.245134,0.243485,0.213614,0.243206,...,0.141519,0.241684,0.333712,0.368637,0.1721,0.226117,0.450746,0.409982,0.102702,0.30293
2,0.0,0.0,1.0,0.461427,0.40322,0.172562,0.176567,0.246976,0.134392,0.187822,...,0.206263,0.236052,0.299761,0.29106,0.195306,0.183816,0.294851,0.342979,0.149084,0.325737
3,0.0,0.0,0.0,1.0,0.911649,0.141064,0.159876,0.191151,0.237503,0.345421,...,0.580813,0.38214,0.596139,0.537225,0.238622,0.174281,0.456989,0.47406,0.111746,0.20476
4,0.0,0.0,0.0,0.0,1.0,0.363996,0.172981,0.131442,0.232806,0.35637,...,0.623952,0.34322,0.577507,0.496774,0.237734,0.190163,0.43784,0.503662,0.098012,0.184889
5,0.0,0.0,0.0,0.0,0.0,1.0,0.324338,0.383702,0.134298,0.121589,...,0.233939,0.140703,0.185505,0.130848,0.183513,0.132179,0.217558,0.221412,0.179101,0.151475
6,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.286457,0.14218,0.281717,...,0.173644,0.196745,0.234928,0.275106,0.210714,0.220743,0.371389,0.201612,0.136833,0.168798
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.185074,0.142593,...,0.173381,0.163545,0.169806,0.151995,0.170773,0.105894,0.235635,0.154924,0.225321,0.268333
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.519517,...,0.159873,0.116091,0.492978,0.548878,0.126101,0.102783,0.231455,0.421443,0.170435,0.163407
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.377587,0.14262,0.360756,0.353386,0.14176,0.25688,0.324734,0.297606,0.176602,0.150658


In [8]:
frame.to_excel('corr.xlsx')