/
YeoJohnson-male.py
75 lines (63 loc) · 3.23 KB
/
YeoJohnson-male.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# install modules
# Matplotlib https://matplotlib.org
# To install,
# pip install matplotlib
# or
# conda install -c conda-forge matplotlib
# scikit-learn https://scikit-learn.org/stable/
# To install,
# pip install -U scikit-learn
# or
# conda install -c conda-forge scikit-learn
SavedFileName = "male_reference_range.csv"
# open file, write Header
f = open(SavedFileName, 'w')
print('Item,lower,estimated,upper', file=f)
import numpy as np
import pandas as pd
import statistics
from sklearn.preprocessing import PowerTransformer
import matplotlib.pyplot as plt
# List of Mostgraph measured item
MeasuredItemList = ("R5", "R5in", "R5ex", "R5delta", "R20", "R20in", "R20ex", "R20delta", "R5-R20", "R5-R20in", "R5-R20ex", "R5-R20delta", "X5", "X5in", "X5ex", "X5delta", "Fres", "Fresin", "Fresex", "Fresdelta", "ALX", "ALXin", "ALXex", "ALXdelta")
MostgraphData_pandas = pd.read_csv("male_control.csv", encoding="utf-8")
for MeasuredItem in MeasuredItemList:
# Extract colume of each measured item example R5
MeasuredValues_pandas = MostgraphData_pandas.loc[:,[MeasuredItem]]
# convert to numpy from pandas dataframe
MeasuredValues_numpy = MeasuredValues_pandas.values
# Transformation
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PowerTransformer.html
# Box-Cox tranformation
# pt = PowerTransformer(method='box-cox')
# Yeo-Johnson transformation
pt = PowerTransformer(method='yeo-johnson')
pt.fit(MeasuredValues_numpy)
TransformedValues_numpy = pt.transform(MeasuredValues_numpy) # [[]]
# Statistics after transformation
SimpleMean = statistics.mean(TransformedValues_numpy.squeeze()) # [[]] -> []
SimpleStd = statistics.stdev(TransformedValues_numpy.squeeze()) # [[]] -> []
# Inverse transformation of mean±2SD
UpperLimit = pt.inverse_transform(np.array([[SimpleMean + 2 * SimpleStd]])).squeeze() # -> [[]] -> [[]] ->
EstimatedValue = pt.inverse_transform(np.array([[SimpleMean]])).squeeze() # -> [[]] -> [[]] ->
LowerLimit = pt.inverse_transform(np.array([[SimpleMean - 2 * SimpleStd]])).squeeze() # -> [[]] -> [[]] ->
print(f'{MeasuredItem}',f'{LowerLimit:.2f} - {UpperLimit:.2f}', sep='\t')
print(f'{MeasuredItem},{LowerLimit},{EstimatedValue},{UpperLimit}', file=f)
# Histogram https://seaborn.pydata.org/generated/seaborn.histplot.html
fig, axs = plt.subplots(1, 2, figsize=(14, 7), facecolor='white')
fig.suptitle("Male: " + MeasuredItem + " Histogram", size="xx-large")
axs[0].hist(MeasuredValues_numpy, facecolor="blue")
axs[0].set_xlabel(MeasuredItem)
axs[0].set_ylabel("count")
axs[0].set_title("a Measured values", color="black")
axs[1].hist(TransformedValues_numpy, facecolor="green")
axs[1].set_xlabel("Yeo-Johnson transformed " + MeasuredItem)
axs[1].set_ylabel("count")
axs[1].set_title("b Yeo-Johnson transformed λ=" + str(pt.lambdas_), color="black")
axs[1].vlines(SimpleMean,0,30,color="red", linewidth=10, linestyle='dotted')
axs[1].axvspan(xmin=SimpleMean-2*SimpleStd, xmax=SimpleMean+2*SimpleStd, ymin=0, ymax=1, color="red", alpha=0.1)
plt.savefig("MaleHistogram " + MeasuredItem + ".jpg", dpi=1200)
plt.show()
f.close()