In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats

from Laboration import LinearRegression

Data = pd.read_csv("Small-diameter-flow.csv", index_col=0)

Y = Data["Flow"]
X = Data[["Kinematic", "Geometric", "Inertial","Observer"]]

LR = LinearRegression(Y,X)

In [2]:
print(f"Number of features:",LR.d)
print(f"Sample size:",LR.n)
print(f"The variance is:",LR.calc_variance())
print(f"The standard deviation is:",LR.calc_std())
print(f"The significance is:",LR.significance_regression())
print(f"The relevance is:",LR.relevance_regression())
print(f"The individual significance is:",LR.individual_significance())



Number of features: 4
Sample size: 198
The variance is: 0.006272292538356666
The standard deviation is: 0.07919780639864128
The significance is: 2.197604317692611e-244
The relevance is: 0.9971526073276518
The individual significance is: [5.730580151465605e-236, 0.0, 1.1628066959544189e-241, 2.342241110726247e-44]


In [3]:
for name, low, high in LR.confidence_intervals():
    print(f"Confidence interval for {name}: [{low:.5f} +- {high:.5f}]")

Confidence interval for Kinematic: [0.88157 +- 0.85844]
Confidence interval for Geometric: [3.61104 +- 3.59526]
Confidence interval for Inertial: [-0.74255 +- -0.76122]
Confidence interval for Observer: [0.01964 +- 0.01412]


In [4]:
print("Pearson Correlation Coefficients between pairs:\n")
for i, j, x in LR.Pearson():
    print(f"{LR.column_names[i-1]} and {LR.column_names[j-1]}: {x}")

Pearson Correlation Coefficients between pairs:

Kinematic and Geometric: 0.8631350761065918
Kinematic and Inertial: 0.9686707504997814
Kinematic and Observer: 0.10322658943843983
Geometric and Inertial: 0.9183300308547001
Geometric and Observer: 0.17519913369993184
Inertial and Observer: 0.12198107336291035


In [5]:
print(LR.observer_bias("Observer"))

('Observer', 2.342241110726247e-44)


### Is there an observer bias in the data collected for the small-diameter flow measurements?

Yes. The p-value is < 0.05 and that means that our result is statistically significant. It indicates that there is some kind of observer-bias. However, this does not fully confirm that there is an observer-bias. This is the case because, for example, there is no way of telling how the tests where done by just reading the data. 