-
Notifications
You must be signed in to change notification settings - Fork 11
/
ISP_bivariate.py
82 lines (61 loc) · 2.28 KB
/
ISP_bivariate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
""" Analysis of multivariate data
- Regression line
- Correlation (Pearson-rho, Spearman-rho, and Kendall-tau)
"""
# author: Thomas Haslwanter, date: June-2022
# Import standard packages
import numpy as np
from scipy import stats
import pandas as pd
import statsmodels.formula.api as smf
def regression_line() -> float:
"""Fit a line, using the powerful "ordinary least square" method of pandas.
Data from 24 type 1 diabetic patients, relating fasting blood
glucose (mmol/l) to mean circumferential shortening velocity (%/sec),
derived form echocardiography.
Returns
-------
f : test statistic
"""
# Get the data
inFile = 'altman_11_6.txt'
data = np.genfromtxt(inFile, delimiter=',')
# Convert them into a pandas DataFrame
df = pd.DataFrame(data, columns=['glucose', 'Vcf'])
# --- >>> START stats <<< ---
# Fit a regression line to the data, and display the model results
results = smf.ols('Vcf ~ glucose', data=df).fit()
# model = pd.ols(y=df['Vcf'], x=df['glucose'])
print(results.summary())
# --- >>> STOP stats <<< ---
return results.fvalue # should be 4.414018433146266
def correlation() -> float:
"""Pearson correlation, and two types of rank correlation (Spearman,
Kendall) comparing age and %fat (measured by dual-photon absorptiometry)
for 18 normal adults.
Returns
-------
corr : Pearson's correlation coefficient
"""
# Get the data
inFile = 'altman_11_1.txt'
data = np.genfromtxt(inFile, delimiter=',')
x = data[:,0]
y = data[:,1]
# --- >>> START stats <<< ---
# Calculate correlations
# Resulting correlation values are stored in a dictionary, so that it is
# obvious which value belongs to which correlation coefficient.
corr = {}
corr['pearson'], _ = stats.pearsonr(x,y)
corr['spearman'], _ = stats.spearmanr(x,y)
corr['kendall'], _ = stats.kendalltau(x,y)
# --- >>> STOP stats <<< ---
print(corr)
# Assert that Spearman's rho is just the correlation of the ranksorted data
np.testing.assert_almost_equal(corr['spearman'],
stats.pearsonr(stats.rankdata(x), stats.rankdata(y))[0])
return corr['pearson'] # should be 0.79208623217849117
if __name__ == '__main__':
regression_line()
correlation()