In [1]:
#One sample t-test

import pandas as pd
from scipy import stats

df = pd.read_csv('단일표본 t 검정 실습.csv', sep=',', encoding='CP949') #인코딩해서 한글 읽도록 함

df

Unnamed: 0,번호,용량
0,1,252
1,2,271
2,3,282
3,4,257
4,5,240
...,...,...
295,296,222
296,297,263
297,298,262
298,299,242


**scipy.stats** is a subpackage of the SciPy library in Python that provides a wide range of statistical functions, probability distributions, hypothesis tests, and random variable tools.

# Top 10 Statistical Functions 실습 정리

# 1. One-sample t-test (단일표본 t검정)

from scipy.stats import ttest_1samp

sample = [170, 172, 168, 169, 171]

t_stat, p_value = ttest_1samp(sample, popmean=170)

print("1. One-sample t-test:", t_stat, p_value)

# 2. Independent t-test (두 독립표본 t검정)

from scipy.stats import ttest_ind

group1 = [85, 90, 88, 92]

group2 = [78, 80, 74, 79]

t_stat, p_value = ttest_ind(group1, group2)

print("2. Independent t-test:", t_stat, p_value)

# 3. Paired t-test (두 종속표본 t검정)

from scipy.stats import ttest_rel

before = [80, 75, 90, 85]

after = [82, 78, 88, 84]

t_stat, p_value = ttest_rel(before, after)

print("3. Paired t-test:", t_stat, p_value)

# 4. Shapiro-Wilk test (정규성 검정)

from scipy.stats import shapiro

data = [85, 88, 90, 86, 84]

stat, p = shapiro(data)

print("4. Shapiro-Wilk test:", stat, p)

# 5. Levene’s test (등분산 검정)

from scipy.stats import levene

stat, p = levene(group1, group2)

print("5. Levene’s test:", stat, p)

# 6. Pearson correlation (피어슨 상관계수)

from scipy.stats import pearsonr

x = [1, 2, 3, 4, 5]

y = [2, 4, 6, 8, 10]

r, p = pearsonr(x, y)

print("6. Pearson correlation:", r, p)

# 7. Spearman correlation (스피어만 상관계수)

from scipy.stats import spearmanr

r, p = spearmanr(x, y)

print("7. Spearman correlation:", r, p)

# 8. Chi-square goodness of fit (적합도 검정)

from scipy.stats import chisquare

f_obs = [30, 50, 20]

f_exp = [33, 33, 33]

stat, p = chisquare(f_obs, f_exp)

print("8. Chi-square goodness of fit:", stat, p)

# 9. Chi-square test of independence (카이제곱 독립성 검정)

from scipy.stats import chi2_contingency

import numpy as np

table = np.array([[10, 20], [20, 30]])

chi2, p, dof, expected = chi2_contingency(table)

print("9. Chi-square test of independence:", chi2, p)

# 10. Normal distribution tools (정규분포 도구)

from scipy.stats import norm

print("10. norm.rvs (샘플링):", norm.rvs(size=5))

print("    norm.pdf(0):", norm.pdf(0))

print("    norm.cdf(1.96):", norm.cdf(1.96))

In [2]:
from scipy import stats
print(stats.ttest_1samp(df['용량'], 250))

TtestResult(statistic=np.float64(-4.673894108057419), pvalue=np.float64(4.4766279943188894e-06), df=np.int64(299))


통곗값 -4.67

pvalue=np.float64(4.4766279943188894e**-06**) -6제곱이라는 뜻 0이 6개가 붙는다. 그럼 굉장히 낮다.  그건 p값이 아주 좋은 것임.  왜냐면 차이가 확실히 있다는 뜻이기 때문이다.

4.47X10의 -6 제곱을 해주면 된다.

In [6]:
#Two dependent samples t-test

import pandas as pd

df = pd.read_csv('두_종속표본_t검정_실습.csv', sep=(','), encoding='CP949')

df

Unnamed: 0,복용 전,복용 후
0,37.5,36.3
1,36.5,36.5
2,38.0,36.7
3,37.9,36.1
4,36.9,36.4
5,36.3,36.5
6,38.2,36.8
7,36.9,36.2
8,36.5,36.6
9,36.7,36.5


In [5]:
from scipy import stats
import numpy as np

print(stats.ttest_rel(df['복용 전'], df['복용 후']))

TtestResult(statistic=np.float64(4.483040960107324), pvalue=np.float64(0.0005156690572260739), df=np.int64(14))


In [17]:
# Two independent samples t-test

df = pd.read_csv('두_독립표본_t검정_실습.csv', sep=(','), encoding='CP949')

df

Unnamed: 0,회사,작동시간
0,1,18
1,1,16
2,1,17
3,1,15
4,1,14
5,1,19
6,1,16
7,1,15
8,1,18
9,1,15


In [18]:
from scipy import stats
import numpy as np
class_A = df[df['회사'] == 1]
company1 = np.array(class_A['작동시간'])
class_B = df[df['회사'] == 2]
company2 = np.array(class_B['작동시간'])
stats.bartlett(company1, company2)

BartlettResult(statistic=np.float64(0.7538799514181551), pvalue=np.float64(0.3852505886267754))

선생님꺼와 다름
The p-value is 0.747, which is greater than 0.05.
Therefore, we fail to reject the null hypothesis of equal variances, and we can assume that the two groups have equal variances.

pvalue가 0.747으로 귀무가설으르 기각하지 못한다: 두 집단 평균에 유의한 차이가 없다.  말 0.5보다 크기 때문에 따라서 차이가 없다는 것임.  등분산을 이루고 있다.

In [12]:
#Two independent samples t-test

print(stats.ttest_ind(회사1, 회사2, equal_var=True))

TtestResult(statistic=np.float64(1.6360078664634983), pvalue=np.float64(0.11009523966224795), df=np.float64(38.0))
