/
F6_16_ttestStability.py
71 lines (55 loc) · 2.28 KB
/
F6_16_ttestStability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
''' Stability of the T-distribution against outliers, compared to the normal distribution. '''
# Copyright(c) 2015, Thomas Haslwanter. All rights reserved, under the CC BY-SA 4.0 International License
# Import standard packages
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import os
# additional packages
# Import formatting commands if directory "Utilities" is available
import os
import sys
sys.path.append(os.path.join('..', '..', 'Utilities'))
try:
from ISP_mystyle import showData
except ImportError:
# Ensure correct performance otherwise
def showData(*options):
plt.show()
return
# Generate the data
np.random.seed(12345)
ndata = 100
data = stats.norm.rvs(size=(ndata,))
data2 = stats.norm.rvs(5, size=(20,))
dataWOutlier = np.hstack((data, data2))
# Calculate the fits
fit_gauss_wo = stats.norm.fit(data)
fit_gauss_w = stats.norm.fit(dataWOutlier)
fit_t_wo = stats.t.fit(data)
fit_t_w = stats.t.fit(dataWOutlier)
# Display the fit-results
print('Gaussian distribution: {0} -> {1}'.format(fit_gauss_wo, fit_gauss_w))
print('T-distribution: {0} -> {1}'.format(fit_t_wo[1:], fit_t_w[1:]))
# Fit of "df" is not shown, as it is pretty unstable for large df
# Plot the fitted curves, to both data sets
fitted_x = np.linspace(-7, 7, 200) # x-values for fitted curves
# Fitted curves to the original data set
fitted_gauss_wo = stats.norm.pdf(fitted_x, loc=fit_gauss_wo[0], scale=fit_gauss_wo[1])
fitted_t_wo = stats.t.pdf(fitted_x, df=ndata-1, loc=fit_t_wo[1], scale=fit_t_wo[2])
# Fitted curves to the data set with outliers
fitted_gauss_w = stats.norm.pdf(fitted_x, loc=fit_gauss_w[0], scale=fit_gauss_w[1])
fitted_t_w = stats.t.pdf(fitted_x, df=ndata-1, loc=fit_t_w[1], scale=fit_t_w[2])
# Show the data
fig, axs = plt.subplots(2,1, sharex=True)
axs[0].hist(data, normed=True, bins=25, color='#CCCCCC')
axs[0].plot(fitted_x, fitted_gauss_wo, label='normal')
axs[0].plot(fitted_x, fitted_t_wo, ls='--', lw=2, label='t-dist')
axs[0].set_title('Without outliers')
axs[0].legend()
axs[1].hist(dataWOutlier, normed=True, bins=25, color='#CCCCCC')
axs[1].plot(fitted_x, fitted_gauss_w, label='norm')
axs[1].plot(fitted_x, fitted_t_w, ls='--', lw=2, label='t-dist')
axs[1].set_title('With outliers')
outFile = 'CentralLimitTheorem.png'
showData(outFile)