In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import KFold

In [3]:
file_path_1 = "data/pima_indian_diabetes/diabetes_cleaned_normalized.csv"
file_path_2 = "data/pima_indian_diabetes/diabetes_discretized.csv"
file_path_3 = "data/pima_indian_diabetes/rules_3.txt"

In [4]:
df_origin_1 = pd.read_csv(file_path_1, index_col=0)
df_origin_2 = pd.read_csv(file_path_2, index_col=0)

display(df_origin_1.head())
display(df_origin_2.head())

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0.461538,0.675325,0.5,0.583333,0.484277,0.493261,0.644444,1
1,0.076923,0.266234,0.411765,0.483333,0.264151,0.245283,0.222222,0
2,0.615385,0.902597,0.382353,0.0,0.160377,0.533693,0.244444,1
3,0.076923,0.292208,0.411765,0.383333,0.311321,0.079964,0.0,0
5,0.384615,0.467532,0.529412,0.0,0.232704,0.110512,0.2,0


Unnamed: 0,Outcome,Pregnancies_Low,Pregnancies_Medium,Pregnancies_High,Glucose_Low,Glucose_Medium,Glucose_High,BloodPressure_Low,BloodPressure_Medium,BloodPressure_High,...,SkinThickness_High,BMI_Low,BMI_Medium,BMI_High,DiabetesPedigreeFunction_Low,DiabetesPedigreeFunction_Medium,DiabetesPedigreeFunction_High,Age_Low,Age_Medium,Age_High
0,1,0,1,0,0,0,1,0,1,0,...,0,0,1,0,0,1,0,0,1,0
1,0,1,0,0,1,0,0,0,1,0,...,0,1,0,0,1,0,0,1,0,0
2,1,0,1,0,0,0,1,0,1,0,...,0,1,0,0,0,1,0,1,0,0
3,0,1,0,0,1,0,0,0,1,0,...,0,1,0,0,1,0,0,1,0,0
5,0,0,1,0,0,1,0,0,1,0,...,0,1,0,0,1,0,0,1,0,0


In [5]:
settings = {
    'seed': 42,
    'n_splits': 5,
    'n_unsupervised': 15,
    'c1': 10,
    'c2': 10,
}

In [6]:
kf = KFold(n_splits=settings['n_splits'])

idx_split = {}

for i, (train_idx, test_idx) in enumerate(kf.split(df_origin_1)):
    idx_split[i] = train_idx, test_idx

In [7]:
X_origin_1 = df_origin_1.drop(["Outcome"], axis=1)
y_origin_1 = df_origin_1["Outcome"]

In [8]:
train_index, test_index = idx_split[0]

In [9]:
# 訓練データ

L = {}
for col_name in df_origin_2.columns:
    df_new = X_origin_1.copy().iloc[train_index, :]
    df_new['target'] = df_origin_2[col_name].replace(0, -1)
    L[col_name] = df_new

np.random.seed(seed=settings['seed'])
arr_u = np.random.rand(settings['n_unsupervised'], X_origin_1.shape[1])
U = {key: arr_u for key in L.keys()}

S = {key: np.vstack([df.drop(['target'], axis=1).values, arr_u]) for key, df in L.items()}


In [10]:
# ルール

from src.misc import is_symbol

KB_origin =  []

with open(file_path_3, 'r') as file:
    for line in file:
        formula = line.split()
        KB_origin.append(formula)

rules_tmp = []
for rule in KB_origin:
    if "Outcome" in rule:
        tmp = {}
        for idx, item in enumerate(rule):
            if not is_symbol(item):
                if idx == 0 or rule[idx - 1] != '¬':
                    tmp[item] = 1
                elif item != "Outcome":
                    tmp[item] = 0
                else:
                    tmp[item] = -1

        rules_tmp.append(tmp)

rules_tmp

[{'Pregnancies_Low': 1, 'Outcome': -1},
 {'Pregnancies_High': 1, 'Outcome': 1},
 {'Glucose_Low': 1, 'Outcome': -1},
 {'Glucose_High': 1, 'Outcome': 1},
 {'BMI_Low': 1, 'Outcome': -1},
 {'BMI_Medium': 1, 'Outcome': 1},
 {'DiabetesPedigreeFunction_Low': 1, 'Outcome': -1},
 {'Age_Low': 1, 'Outcome': -1},
 {'Age_Medium': 1, 'Outcome': 1},
 {'Pregnancies_Medium': 0,
  'Glucose_High': 1,
  'BMI_Low': 0,
  'DiabetesPedigreeFunction_Low': 0,
  'BMI_Medium': 1,
  'Outcome': 1},
 {'Glucose_Low': 0,
  'DiabetesPedigreeFunction_Low': 0,
  'Age_Medium': 1,
  'BloodPressure_Medium': 1,
  'BMI_Low': 0,
  'Glucose_Medium': 1,
  'Outcome': 1},
 {'Glucose_High': 0,
  'Glucose_Low': 0,
  'Pregnancies_High': 0,
  'DiabetesPedigreeFunction_Low': 1,
  'BloodPressure_Medium': 0,
  'BloodPressure_Low': 0,
  'Age_Low': 0,
  'SkinThickness_Medium': 0,
  'Outcome': -1},
 {'BMI_Low': 0,
  'Glucose_Medium': 1,
  'Pregnancies_High': 0,
  'SkinThickness_Low': 0,
  'DiabetesPedigreeFunction_Medium': 0,
  'BloodPressu

In [11]:
len_j = len(L)
len_l = len(train_index)
len_u = settings['n_unsupervised']
len_s = len_l + len_u

len_h = len(KB_origin)
len_i = len_u * 2

In [12]:
# テストデータ

df_new = df_origin_1.copy().iloc[test_index, :]
df_new = df_new.rename(columns={'Outcome': 'target'})
df_new['target'] = df_new['target'].replace(0, -1)

test_data = {
    'Outcome': df_new
}




In [13]:
from src.misc import linear_kernel

inputs_luka = {
    'L': L,
    'U': U,
    'S': S,
    'len_j': len_j,
    'len_l': len_l,
    'len_u': len_u,
    'len_s': len_s,
    'len_h': len_h,
    'len_i': len_i,
    'c1': settings['c1'],
    'c2': settings['c2'],
    'KB_origin': KB_origin,
    'target_predicate': 'Outcome',
    'kernel_function': linear_kernel
}

In [14]:
from src.setup_problem_dual_single_task import Setup
from src.objective_function_single_task import ObjectiveFunction
from src.predicate_single_task import Predicate_dual

import cvxpy as cp

problem_instance = Setup(inputs_luka, ObjectiveFunction)
objective_function, constraints = problem_instance.main()
problem = cp.Problem(objective_function, constraints)
result = problem.solve(verbose=True)

load_rules took 0.034693241119384766 seconds!
define_cvxpy_variables took 0.00012612342834472656 seconds!
shape of P: (1807, 1807)
finish l
finish h
finish s
finish l h
finish l s
finish h s
_construct_P_j took 1.314634084701538 seconds!
construct_constraints took 0.18778395652770996 seconds!
                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Feb 03 03:42:42 PM: Your problem has 36756 variables, 1257 constraints, and 0 parameters.




(CVXPY) Feb 03 03:42:43 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Feb 03 03:42:43 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Feb 03 03:42:43 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Feb 03 03:42:43 PM: Compiling problem (target solver=OSQP).
(CVXPY) Feb 03 03:42:43 PM: Reduction chain: FlipObjective -> CvxAttr2Constr -> Qp2SymbolicQp -> QpMatrixStuffing -> OSQP
(CVXPY) Feb 03 03:42:43 PM: Applying reduction FlipObjective
(CVXPY) Feb 03 03:42:43 PM: Applying reduction CvxAttr2Constr
(CVXPY) Feb 03 03:42:43 PM: Applying reduction Qp2SymbolicQp
(CVXPY) Feb 03 03:42:43 PM: Applying reduc



(CVXPY) Feb 03 03:42:48 PM: Applying reduction OSQP
(CVXPY) Feb 03 03:42:48 PM: Finished problem compilation (took 5.307e+00 seconds).
-------------------------------------------------------------------------------
                                Numerical solver                               
-------------------------------------------------------------------------------
(CVXPY) Feb 03 03:42:48 PM: Invoking solver OSQP  to obtain a solution.
-----------------------------------------------------------------
           OSQP v0.6.3  -  Operator Splitting QP Solver
              (c) Bartolomeo Stellato,  Goran Banjac
        University of Oxford  -  Stanford University 2021
-----------------------------------------------------------------
problem:  variables n = 38563, constraints m = 39820
          nnz(P) + nnz(A) = 1092163
settings: linear system solver = qdldl,
          eps_abs = 1.0e-05, eps_rel = 1.0e-05,
          eps_prim_inf = 1.0e-04, eps_dual_inf = 1.0e-04,
          rho = 1.0

In [17]:
from src.predicate_single_task import Predicate_dual

p_trained = Predicate_dual(problem_instance.problem_info, metrics='f1')

[I 2024-02-03 15:47:07,940] A new study created in memory with name: no-name-05db2c78-c737-446a-8b23-0b8b6c90b58b


[I 2024-02-03 15:47:07,998] Trial 0 finished with value: 0.3982683982683983 and parameters: {'b': -0.7846573120597049}. Best is trial 0 with value: 0.3982683982683983.
[I 2024-02-03 15:47:08,058] Trial 1 finished with value: 0.49014084507042255 and parameters: {'b': 0.41793724376386665}. Best is trial 1 with value: 0.49014084507042255.
[I 2024-02-03 15:47:08,073] Trial 2 finished with value: 0.0 and parameters: {'b': -1.3539311351002827}. Best is trial 1 with value: 0.49014084507042255.
[I 2024-02-03 15:47:08,098] Trial 3 finished with value: 0.49014084507042255 and parameters: {'b': 1.1058270110118236}. Best is trial 1 with value: 0.49014084507042255.
[I 2024-02-03 15:47:08,107] Trial 4 finished with value: 0.0 and parameters: {'b': -2.7278285382240806}. Best is trial 1 with value: 0.49014084507042255.
[I 2024-02-03 15:47:08,123] Trial 5 finished with value: 0.652873563218391 and parameters: {'b': -0.40131702038596595}. Best is trial 5 with value: 0.652873563218391.
[I 2024-02-03 15:4


min_bound: -3.779336212229724
max_bound: 1.2206637877702762



[I 2024-02-03 15:47:08,147] Trial 7 finished with value: 0.0335195530726257 and parameters: {'b': -1.0718969580597602}. Best is trial 5 with value: 0.652873563218391.
[I 2024-02-03 15:47:08,161] Trial 8 finished with value: 0.0 and parameters: {'b': -1.5606130509894807}. Best is trial 5 with value: 0.652873563218391.
[I 2024-02-03 15:47:08,190] Trial 9 finished with value: 0.5233082706766917 and parameters: {'b': -0.08758670721142314}. Best is trial 5 with value: 0.652873563218391.
[I 2024-02-03 15:47:08,209] Trial 10 finished with value: 0.0 and parameters: {'b': -2.7747969028826835}. Best is trial 5 with value: 0.652873563218391.
[I 2024-02-03 15:47:08,221] Trial 11 finished with value: 0.5946902654867258 and parameters: {'b': -0.2589083833913048}. Best is trial 5 with value: 0.652873563218391.
[I 2024-02-03 15:47:08,233] Trial 12 finished with value: 0.5949820788530467 and parameters: {'b': -0.26770981600643995}. Best is trial 5 with value: 0.652873563218391.
[I 2024-02-03 15:47:08,

In [22]:
inputs_luka.keys()

dict_keys(['L', 'U', 'S', 'len_j', 'len_l', 'len_u', 'len_s', 'len_h', 'len_i', 'c1', 'c2', 'KB_origin', 'target_predicate', 'kernel_function', 'KB', 'lambda_jl', 'lambda_hi', 'eta_js', 'eta_hat_js', 'M', 'q', 'target_p_idx', 'predicates_dict'])

In [16]:
X_test = test_data['Outcome'].drop(['target'], axis=1)
y_test = test_data['Outcome']['target']

In [17]:
y_pred = p_trained(X_test)
y_pred

array([ 0.91658784,  0.24287098,  0.70199162,  0.12682547,  0.27065501,
        0.19340925,  0.93530853,  0.3918625 ,  0.82005737,  0.83188338,
        0.81083968,  0.7441675 ,  0.33207916,  0.51398586,  0.78517418,
        0.56069824,  0.96282072,  0.61510045,  0.99251962,  0.65186397,
        0.66369045,  0.17311648,  0.83270457,  0.5015721 ,  0.75644688,
        0.82859727,  0.07853962,  0.19074502,  0.77689667,  0.54707424,
        0.65447452,  0.79542907,  0.48227473,  0.65902619,  0.73253885,
        0.52402166,  0.5572686 ,  0.38177903,  0.2357504 ,  0.56630266,
        0.19414038,  0.1770098 ,  0.26844876,  1.13532806,  0.94903559,
       -0.08617708,  0.97833493,  0.88113526,  0.38573067,  0.53504463,
        0.07466718,  0.5401907 ,  0.42841703,  0.38625383,  0.64483253,
        0.72593637,  0.09640416,  0.56267187,  0.46481111,  0.57808827,
        0.87621141,  0.50175106,  0.26155793,  0.28785552,  0.48918271,
        0.26857474,  0.08424127,  0.56159949,  0.17375559,  0.52

In [18]:
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)
y_pred_interpreted

array([ 1, -1,  1, -1, -1, -1,  1, -1,  1,  1,  1,  1, -1,  1,  1,  1,  1,
        1,  1,  1,  1, -1,  1,  1,  1,  1, -1, -1,  1,  1,  1,  1, -1,  1,
        1,  1,  1, -1, -1,  1, -1, -1, -1,  1,  1, -1,  1,  1, -1,  1, -1,
        1, -1, -1,  1,  1, -1,  1, -1,  1,  1,  1, -1, -1, -1, -1, -1,  1,
       -1,  1,  1, -1, -1, -1,  1,  1,  1,  1,  1, -1, -1, -1,  1, -1, -1,
       -1, -1, -1,  1, -1, -1,  1,  1, -1, -1,  1,  1, -1, -1, -1, -1, -1,
       -1, -1,  1, -1,  1,  1,  1,  1,  1,  1, -1, -1, -1, -1, -1, -1, -1,
        1, -1, -1,  1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1])

In [19]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

from sklearn.metrics import confusion_matrix


acc = accuracy_score(y_test, y_pred_interpreted)
pre = precision_score(y_test, y_pred_interpreted)
rec = recall_score(y_test, y_pred_interpreted)
f1  = f1_score(y_test, y_pred_interpreted)
auc = roc_auc_score(y_test, y_pred_interpreted)

print(f'accuracy: {acc}')
print(f'precision: {pre}')
print(f'recall: {rec}')
print(f'f1: {f1}')
print(f'auc: {auc}')

accuracy: 0.6791044776119403
precision: 0.5072463768115942
recall: 0.7954545454545454
f1: 0.6194690265486726
auc: 0.7088383838383838


In [25]:
confusion_matrix(y_test, y_pred_interpreted)

array([[56, 34],
       [ 9, 35]])

In [18]:
df_origin_1

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0.461538,0.675325,0.500000,0.583333,0.484277,0.493261,0.644444,1
1,0.076923,0.266234,0.411765,0.483333,0.264151,0.245283,0.222222,0
2,0.615385,0.902597,0.382353,0.000000,0.160377,0.533693,0.244444,1
3,0.076923,0.292208,0.411765,0.383333,0.311321,0.079964,0.000000,0
5,0.384615,0.467532,0.529412,0.000000,0.232704,0.110512,0.200000,0
...,...,...,...,...,...,...,...,...
763,0.769231,0.370130,0.558824,0.800000,0.462264,0.083558,0.933333,0
764,0.153846,0.506494,0.470588,0.450000,0.584906,0.235400,0.133333,0
765,0.384615,0.500000,0.500000,0.383333,0.251572,0.150045,0.200000,0
766,0.076923,0.532468,0.323529,0.000000,0.374214,0.243486,0.577778,1


In [20]:
print(df_origin_1)

     Pregnancies   Glucose  BloodPressure  SkinThickness       BMI  \
0       0.461538  0.675325       0.500000       0.583333  0.484277   
1       0.076923  0.266234       0.411765       0.483333  0.264151   
2       0.615385  0.902597       0.382353       0.000000  0.160377   
3       0.076923  0.292208       0.411765       0.383333  0.311321   
5       0.384615  0.467532       0.529412       0.000000  0.232704   
..           ...       ...            ...            ...       ...   
763     0.769231  0.370130       0.558824       0.800000  0.462264   
764     0.153846  0.506494       0.470588       0.450000  0.584906   
765     0.384615  0.500000       0.500000       0.383333  0.251572   
766     0.076923  0.532468       0.323529       0.000000  0.374214   
767     0.076923  0.318182       0.470588       0.516667  0.383648   

     DiabetesPedigreeFunction       Age  Outcome  
0                    0.493261  0.644444        1  
1                    0.245283  0.222222        0  
2     

In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# データを読み込む
data = df_origin_1  # データが保存されているファイル名に適宜置き換える

# 特徴量とターゲット変数に分割
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# トレーニングセットとテストセットにデータを分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# SVMモデルの定義
svm = SVC(kernel='rbf')

# グリッドサーチのパラメータグリッド
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [0.1, 0.01, 0.001, 0.0001]}

# グリッドサーチを使用してハイパーパラメータの最適化
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train, y_train)

# 最適なハイパーパラメータの表示
print("Best Parameters:", grid_search.best_params_)

# 最適なモデルの取得
best_svm = grid_search.best_estimator_

# テストセットでの性能評価
y_pred = best_svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on test set:", accuracy)


Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best Parameters: {'C': 1, 'gamma': 0.1}
Accuracy on test set: 0.8283582089552238


In [27]:
import numpy as np

import numpy as np

def rbf_kernel(x1: np.ndarray, x2: np.ndarray, gamma: float = 0.1) -> np.ndarray:
    """
    RBFカーネルを計算する関数
    
    Parameters:
    x1 : np.ndarray, shape (n, m)
        特徴量行列1
    x2 : np.ndarray, shape (l, m)
        特徴量行列2
    gamma : float
        RBFカーネルのパラメータ
    
    Returns:
    np.ndarray, shape (n, l)
        RBFカーネル行列
    """
    n, m = x1.shape
    l, _ = x2.shape
    
    # Calculate squared norms
    norms_x1 = np.sum(x1**2, axis=1, keepdims=True)
    norms_x2 = np.sum(x2**2, axis=1, keepdims=True)
    
    # Compute the dot product between x1 and x2
    dot_product = np.dot(x1, x2.T)
    
    # Compute the squared Euclidean distance
    distance_squared = norms_x1 - 2 * dot_product + norms_x2.T
    
    # Compute the RBF kernel matrix
    rbf_matrix = np.exp(-gamma * distance_squared)
    
    return rbf_matrix



# 最適なgammaの値を指定
optimal_gamma = 0.1

# 例として2つの特徴量ベクトルを作成してRBFカーネルを計算してみる
x1 = np.array([1, 2, 3])
x2 = np.array([4, 5, 6])

x1 = np.random.rand(5, 3)
x2 = np.random.rand(3, 3)

rbf_value = rbf_kernel(x1, x1, optimal_gamma)
print("RBFカーネルの値:", rbf_value)


RBFカーネルの値: [[1.         0.91815575 0.91655851 0.94909308 0.94689566]
 [0.91815575 1.         0.97201749 0.99303498 0.94591771]
 [0.91655851 0.97201749 1.         0.98318474 0.97286195]
 [0.94909308 0.99303498 0.98318474 1.         0.96092707]
 [0.94689566 0.94591771 0.97286195 0.96092707 1.        ]]


In [28]:
rbf_value

array([[1.        , 0.91815575, 0.91655851, 0.94909308, 0.94689566],
       [0.91815575, 1.        , 0.97201749, 0.99303498, 0.94591771],
       [0.91655851, 0.97201749, 1.        , 0.98318474, 0.97286195],
       [0.94909308, 0.99303498, 0.98318474, 1.        , 0.96092707],
       [0.94689566, 0.94591771, 0.97286195, 0.96092707, 1.        ]])

In [31]:
def rbf_kernel(x1: np.ndarray, x2: np.ndarray, gamma: float = 0.1) -> np.ndarray:
    """
    RBFカーネルを計算する関数
    
    Parameters:
    x1 : np.ndarray, shape (n, m)
        特徴量行列1
    x2 : np.ndarray, shape (l, m)
        特徴量行列2
    gamma : float
        RBFカーネルのパラメータ
    
    Returns:
    np.ndarray, shape (n, l)
        RBFカーネル行列
    """

    if x1.ndim == 1:
        x1 = x1.reshape(1, -1)
        x2 = x2.reshape(1, -1)

    n, m = x1.shape
    l, _ = x2.shape
    
    # Calculate squared norms
    norms_x1 = np.sum(x1**2, axis=1, keepdims=True)
    norms_x2 = np.sum(x2**2, axis=1, keepdims=True)
    
    # Compute the dot product between x1 and x2
    dot_product = np.dot(x1, x2.T)
    
    # Compute the squared Euclidean distance
    distance_squared = norms_x1 - 2 * dot_product + norms_x2.T
    
    # Compute the RBF kernel matrix
    rbf_matrix = np.exp(-gamma * distance_squared)

    if rbf_matrix.shape == (1, 1):
        return rbf_matrix[0]
    else:
        return rbf_matrix

In [32]:
x1 = np.random.rand(5)
x2 = np.random.rand(5)

res = rbf_kernel(x1, x2)

res, res.shape, type(res)

(array([0.98963127]), (1,), numpy.ndarray)

In [33]:
res.ndim

1

In [35]:
import pandas as pd

In [38]:
mat = pd.read_csv("P_mat.csv", index_col=0)
mat

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1797,1798,1799,1800,1801,1802,1803,1804,1805,1806
0,4.000001,3.825003,3.764450,3.856101,3.980560,-3.799732,-3.812146,3.923047,3.830738,3.905930,...,-1.812522,-1.806124,-1.597316,-1.824554,-1.770200,-1.643823,-1.883234,-1.885056,-1.794556,-1.752355
1,3.825003,4.000001,3.876688,3.828044,3.821097,-3.734154,-3.826458,3.888186,3.734308,3.747958,...,-1.830052,-1.896415,-1.813626,-1.780680,-1.791496,-1.692641,-1.845962,-1.895402,-1.928603,-1.739979
2,3.764450,3.876688,4.000001,3.796040,3.762622,-3.561464,-3.836348,3.787645,3.677031,3.719897,...,-1.942630,-1.835465,-1.794221,-1.856987,-1.889591,-1.840575,-1.865839,-1.906396,-1.925488,-1.816773
3,3.856101,3.828044,3.796040,4.000001,3.851155,-3.698265,-3.676331,3.873641,3.648211,3.735161,...,-1.767580,-1.815751,-1.745815,-1.780097,-1.796493,-1.647498,-1.910091,-1.942853,-1.803375,-1.720429
4,3.980560,3.821097,3.762622,3.851155,4.000001,-3.800148,-3.834462,3.950937,3.855370,3.912707,...,-1.832610,-1.851041,-1.624467,-1.839405,-1.750034,-1.692089,-1.924455,-1.893592,-1.823860,-1.781571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1802,-1.643823,-1.692641,-1.840575,-1.647498,-1.692089,1.588374,1.824676,-1.671295,-1.714921,-1.692672,...,0.946106,0.870477,0.850258,0.902730,0.868764,1.000001,0.887472,0.877406,0.919768,0.921276
1803,-1.883234,-1.845962,-1.865839,-1.910091,-1.924455,1.752453,1.825875,-1.907621,-1.791631,-1.820093,...,0.919619,0.929417,0.842996,0.908020,0.840304,0.887472,1.000001,0.943881,0.908158,0.908188
1804,-1.885056,-1.895402,-1.906396,-1.942853,-1.893592,1.844576,1.889689,-1.887813,-1.839746,-1.856587,...,0.894678,0.890837,0.888349,0.867290,0.886637,0.877406,0.943881,1.000001,0.937081,0.854995
1805,-1.794556,-1.928603,-1.925488,-1.803375,-1.823860,1.770987,1.913295,-1.867433,-1.830751,-1.812082,...,0.941048,0.938611,0.922399,0.868540,0.872538,0.919768,0.908158,0.937081,1.000001,0.864068


In [40]:
arr = mat.values
arr

array([[ 4.000001  ,  3.82500325,  3.76445028, ..., -1.88505583,
        -1.79455622, -1.75235507],
       [ 3.82500325,  4.000001  ,  3.8766879 , ..., -1.89540211,
        -1.92860293, -1.73997928],
       [ 3.76445028,  3.8766879 ,  4.000001  , ..., -1.90639588,
        -1.92548794, -1.81677259],
       ...,
       [-1.88505583, -1.89540211, -1.90639588, ...,  1.000001  ,
         0.93708086,  0.85499459],
       [-1.79455622, -1.92860293, -1.92548794, ...,  0.93708086,
         1.000001  ,  0.86406764],
       [-1.75235507, -1.73997928, -1.81677259, ...,  0.85499459,
         0.86406764,  1.000001  ]])

In [43]:
np.all(np.linalg.eigvals(arr) >= 0)

True

In [44]:
arr.max()

4.000001

In [45]:
arr.min()

-3.996868612305697