In [1]:
from itertools import combinations

import numpy as np
import pandas as pd

%load_ext autoreload
%autoreload 2

# Завдання:
## 1. Використати наведені в таблиці експериментальні дані для розрахунку парних та часткових коефіцієнтів кореляції між всіма можливими парами змінних $X_1, X_2$ та $Y$
|   $N$ п.п. |   Витрата 1-го реагента ($X_1$) |   Витрата 2-го реагента ($X_2$) |   Концентрація продукту на виході ($Y$) |
|:----------:|:-------------------------------:|:-------------------------------:|:---------------------------------------:|
|          1 |                            85.5 |                             255 |                                    17.3 |
|          2 |                            81.7 |                             341 |                                    19.8 |
|          3 |                            71.7 |                             373 |                                    30.1 |
|          4 |                            62.7 |                             447 |                                    31.9 |
|          5 |                            66.4 |                             446 |                                    38.3 |
|          6 |                            70.6 |                             410 |                                    26.5 |
|          7 |                            65   |                             495 |                                    36.2 |
|          8 |                            72.8 |                             451 |                                    21   |
|          9 |                            67.6 |                             565 |                                    29.5 |
|         10 |                            90.2 |                             354 |                                    24.9 |
|         11 |                            60.2 |                             549 |                                    25   |
|         12 |                            74.8 |                             328 |                                    28   |
|         13 |                            63.4 |                             555 |                                    33.9 |
|         14 |                            74.2 |                             415 |                                    16.1 |
|         15 |                            71.6 |                             415 |                                    21   |
|         16 |                            60.9 |                             527 |                                    28.7 |
|         17 |                            81.1 |                             379 |                                    30.3 |
|         18 |                            71.5 |                             439 |                                    19.9 |
|         19 |                            77.2 |                             350 |                                    22.6 |
|         20 |                            91.2 |                             278 |                                    20.1 |        

In [2]:
def corr_moment(x, y):
    assert len(x) == len(y)
    return np.sum((x - x.mean()) * (y - y.mean())) / (len(x) - 1)


def corr_coef(x, y):
    return corr_moment(x, y) / (x.std(ddof=1) * y.std(ddof=1))

def partial_corr_coef(x1, y, x2):
    r_x1y = corr_coef(x1, y)
    r_x2y = corr_coef(x2, y)
    r_x1x2 = corr_coef(x1, x2)
    return (r_x1y - r_x2y * r_x1x2) / np.sqrt((1 - r_x2y ** 2) * (1 - r_x1x2 ** 2))


df = pd.DataFrame(
    {
        'Витрата 1-го реагента ($X_1$)': [85.5, 81.7, 71.7, 62.7, 66.4, 70.6, 65.0, 72.8, 67.6, 90.2, 60.2, 74.8, 63.4, 74.2, 71.6, 60.9, 81.1, 71.5, 77.2, 91.2],
        'Витрата 2-го реагента ($X_2$)': [255, 341, 373, 447, 446, 410, 495, 451, 565, 354, 549, 328, 555, 415, 415, 527, 379, 439, 350, 278],
        'Концентрація продукту на виході ($Y$)': [17.3, 19.8, 30.1, 31.9, 38.3, 26.5, 36.2, 21.0, 29.5, 24.9, 25.0, 28.0, 33.9, 16.1, 21.0, 28.7, 30.3, 19.9, 22.6, 20.1]
    },
    pd.RangeIndex(1, 21, name='$N$ п.п.')
)

for col1, col2 in combinations(df.columns, 2):
    col1_name = col1.split('$')[1]
    col2_name = col2.split('$')[1]
    col3 = df.columns.difference([col1, col2])[0]
    col3_name = col3.split('$')[1]

    col1 = df[col1]
    col2 = df[col2]
    col3 = df[col3]
    
    print(f'Correlation Coefficient between {col1_name} and {col2_name}:', corr_coef(col1, col2))
    print(f'Partial Correlation Coefficient between {col1_name} and {col2_name}, excluding {col3_name}:', partial_corr_coef(col1, col2, col3), end='\n\n')

Correlation Coefficient between X_1 and X_2: -0.8534753001415539
Partial Correlation Coefficient between X_1 and X_2, excluding Y: -0.8011451034897367

Correlation Coefficient between X_1 and Y: -0.5475012651746156
Partial Correlation Coefficient between X_1 and Y, excluding X_2: -0.27659083124076356

Correlation Coefficient between X_2 and Y: 0.4947253756165764
Partial Correlation Coefficient between X_2 and Y, excluding X_1: 0.06293830602466889



## 2. Розрахувати ті ж коефіцієнти при:
- ### Зміні знаку $X_1$

In [3]:
for col1, col2 in combinations(df.columns, 2):
    col1_name = col1.split('$')[1]
    col2_name = col2.split('$')[1]
    col3 = df.columns.difference([col1, col2])[0]
    col3_name = col3.split('$')[1]

    col1 = -df[col1]
    col2 = df[col2]
    col3 = df[col3]
    
    print(f'Correlation Coefficient between {col1_name} and {col2_name}:', corr_coef(col1, col2))
    print(f'Partial Correlation Coefficient between {col1_name} and {col2_name}, excluding {col3_name}:', partial_corr_coef(col1, col2, col3), end='\n\n')

Correlation Coefficient between X_1 and X_2: 0.8534753001415539
Partial Correlation Coefficient between X_1 and X_2, excluding Y: 0.8011451034897367

Correlation Coefficient between X_1 and Y: 0.5475012651746156
Partial Correlation Coefficient between X_1 and Y, excluding X_2: 0.27659083124076356

Correlation Coefficient between X_2 and Y: -0.4947253756165764
Partial Correlation Coefficient between X_2 and Y, excluding X_1: -0.06293830602466889



- ### При одночасній зміні знаків $X_1, X_2$

In [4]:
for col1, col2 in combinations(df.columns, 2):
    col1_name = col1.split('$')[1]
    col2_name = col2.split('$')[1]
    col3 = df.columns.difference([col1, col2])[0]
    col3_name = col3.split('$')[1]

    col1 = -df[col1]
    col2 = -df[col2]
    col3 = df[col3]
    
    print(f'Correlation Coefficient between {col1_name} and {col2_name}:', corr_coef(col1, col2))
    print(f'Partial Correlation Coefficient between {col1_name} and {col2_name}, excluding {col3_name}:', partial_corr_coef(col1, col2, col3), end='\n\n')

Correlation Coefficient between X_1 and X_2: -0.8534753001415539
Partial Correlation Coefficient between X_1 and X_2, excluding Y: -0.8011451034897367

Correlation Coefficient between X_1 and Y: -0.5475012651746156
Partial Correlation Coefficient between X_1 and Y, excluding X_2: -0.27659083124076356

Correlation Coefficient between X_2 and Y: 0.4947253756165764
Partial Correlation Coefficient between X_2 and Y, excluding X_1: 0.06293830602466889



- ### При одночасній зміні знаків $X_1, X_2$ та $Y$

In [5]:
for col1, col2 in combinations(df.columns, 2):
    col1_name = col1.split('$')[1]
    col2_name = col2.split('$')[1]
    col3 = df.columns.difference([col1, col2])[0]
    col3_name = col3.split('$')[1]

    col1 = -df[col1]
    col2 = -df[col2]
    col3 = -df[col3]
    
    print(f'Correlation Coefficient between {col1_name} and {col2_name}:', corr_coef(col1, col2))
    print(f'Partial Correlation Coefficient between {col1_name} and {col2_name}, excluding {col3_name}:', partial_corr_coef(col1, col2, col3), end='\n\n')

Correlation Coefficient between X_1 and X_2: -0.8534753001415539
Partial Correlation Coefficient between X_1 and X_2, excluding Y: -0.8011451034897367

Correlation Coefficient between X_1 and Y: -0.5475012651746156
Partial Correlation Coefficient between X_1 and Y, excluding X_2: -0.27659083124076356

Correlation Coefficient between X_2 and Y: 0.4947253756165764
Partial Correlation Coefficient between X_2 and Y, excluding X_1: 0.06293830602466889

