# Sử dụng thư viện Scipy

## Bước 1. Tính trung bình mẫu và phương sai mẫu hiệu chỉnh

In [6]:
import pandas as pd

CSV_PATH = "smartphone_usage_time.csv"
TIME_COL = "Thoi gian trung binh su dung"

df = pd.read_csv(CSV_PATH, encoding="utf-8")

td = pd.to_timedelta(df[TIME_COL], errors="coerce")

hours = td.dt.total_seconds() / 3600
hours = hours.dropna()

sample_mean = hours.mean()
sample_variance_unbiased = hours.var(ddof=1)
n = hours.shape[0]
print(f"n = {n}")
print(f"Trung binh mau X = {sample_mean:.2f} gio")
print(f"Phuong sai mau hieu chinh Y = {sample_variance_unbiased:.2f} gio^2")


n = 265
Trung binh mau X = 6.39 gio
Phuong sai mau hieu chinh Y = 7.25 gio^2


## Bước 2. Chuẩn hoá dữ liệu bằng z - score

In [10]:
def z_score(series: pd.Series):
    mu = series.mean()
    sigma = series.std(ddof=1)
    return (series - mu) / sigma
df['gio'] = td.dt.total_seconds() / 3600
valid = df['gio'].notna()
df.loc[valid, "gio_zscore"] = z_score(df.loc[valid, 'gio'])
df_fmt = df.copy()
df_out = df_fmt.head(5)
df

Unnamed: 0,Timestamp,STT,Ho va ten,Tuoi,Thoi gian trung binh su dung,gio,gio_zscore
0,9/8/2025 14:21:05,1,Nguyễn Mai Đức Trọng,21,2:54:00,2.900000,-1.296602
1,9/8/2025 14:26:10,2,Phạm Minh Hằng,19,3:40:00,3.666667,-1.011891
2,9/8/2025 14:36:56,3,Trần Anh Dũng,21,7:17:00,7.283333,0.331200
3,9/8/2025 14:37:55,4,Phạm Nguyễn Mai Thương,20,5:41:00,5.683333,-0.262979
4,9/8/2025 14:40:00,5,Nguyễn Thế Chiến,20,12:38:00,12.633333,2.317985
...,...,...,...,...,...,...,...
260,9/14/2025 16:53:11,261,Nguyễn Vũ Quang Anh,21,6:15:00,6.250000,-0.052540
261,9/14/2025 16:53:39,262,Phạm Quang Trưởng Anh,21,8:15:00,8.250000,0.690183
262,9/14/2025 16:53:58,263,Phạm Viết Quốc Anh,21,7:35:00,7.583333,0.442609
263,9/14/2025 16:54:18,264,Phạm Tiến Vượng,21,9:10:00,9.166667,1.030598


## Bước 3. Ước lượng kỳ vọng bằng khoảng tin cậy 95%

In [15]:
pip install scipy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [8]:
import scipy.stats as st
import numpy as np
sample_std_unbiased = np.sqrt(sample_variance_unbiased)
standard_error_mean = sample_std_unbiased / np.sqrt(n)
ci_low, ci_high = st.t.interval(confidence=0.95, df=num_sample-1, loc=sample_mean, scale=standard_error_mean)
print(f"Khoang tin cay 95% cho ky vong: [{ci_low:.6f}, {ci_high:.6f}] gio")

Khoang tin cay 95% cho ky vong: [6.065776, 6.717184] gio


## Bước 4. Kiểm định giả thuyết thống kê cho kỳ vọng với mức ý nghĩa 5\%

In [9]:
mu0 = 7.3
alpha = 0.05

z_qs = ((sample_mean - mu0) / sample_std_unbiased) * np.sqrt(n)
p_value_z = st.norm.cdf(z_qs)
reject_z = p_value_z < alpha

print(f"z_qs = {z_qs:.6f}")
print(f"p-value (one-sided, H1: mu < {mu0}) = {p_value_z:.6e}")
print(f"Ket luan o muc y nghia {alpha*100:.0f}%: " + ("Bac bo H0" if reject_z else "Khong bac bo H0"))

res = st.ttest_1samp(hours, popmean=mu0, alternative="less")  # SciPy >= 1.9
t_qs = float(res.statistic)
p_value_t = float(res.pvalue)
reject_t = p_value_t < alpha

print(f"t_qs = {t_qs:.6f}")
print(f"p-value (one-sided, H1: mu < {mu0}) = {p_value_t:.6e}")
print(f"Ket luan o muc y nghia {alpha*100:.0f}%: " + ("Bac bo H0" if reject_t else "Khong bac bo H0"))


z_qs = -5.492302
p-value (one-sided, H1: mu < 7.3) = 1.983637e-08
Ket luan o muc y nghia 5%: Bac bo H0
t_qs = -5.492302
p-value (one-sided, H1: mu < 7.3) = 4.662221e-08
Ket luan o muc y nghia 5%: Bac bo H0


# Sử dụng thư viện Statsmodels

## Bước 1. Tính trung bình mẫu và phương sai mẫu hiệu chỉnh

In [11]:
import pandas as pd
import numpy as np

CSV_PATH = "smartphone_usage_time.csv"

df = pd.read_csv(CSV_PATH, encoding="utf-8")

td = pd.to_timedelta(df["Thoi gian trung binh su dung"], errors="coerce")
hours = td.dt.total_seconds() / 3600
hours = hours.dropna()

n = hours.shape[0]
sample_mean = hours.mean()
sample_var = hours.var(ddof=1)
sample_std = np.sqrt(sample_var)

print(f"n = {n}")
print(f"Trung binh mau = {sample_mean:.2f} gio")
print(f"Phuong sai mau hieu chinh = {sample_var:.2f} gio^2")

n = 265
Trung binh mau = 6.39 gio
Phuong sai mau hieu chinh = 7.25 gio^2


## Bước 2. Chuẩn hoá dữ liệu bằng z - score

In [12]:
df["gio"] = td.dt.total_seconds() / 3600
df["gio_zscore"] = (df["gio"] - sample_mean) / sample_std
print(df.head())

           Timestamp  STT               Ho va ten  Tuoi  \
0  9/8/2025 14:21:05    1    Nguyễn Mai Đức Trọng    21   
1  9/8/2025 14:26:10    2          Phạm Minh Hằng    19   
2  9/8/2025 14:36:56    3           Trần Anh Dũng    21   
3  9/8/2025 14:37:55    4  Phạm Nguyễn Mai Thương    20   
4  9/8/2025 14:40:00    5        Nguyễn Thế Chiến    20   

  Thoi gian trung binh su dung        gio  gio_zscore  
0                      2:54:00   2.900000   -1.296602  
1                      3:40:00   3.666667   -1.011891  
2                      7:17:00   7.283333    0.331200  
3                      5:41:00   5.683333   -0.262979  
4                     12:38:00  12.633333    2.317985  


## Bước 3. Ước lượng kỳ vọng bằng khoảng tin cậy 95%

In [None]:
pip install statsmodels

Defaulting to user installation because normal site-packages is not writeable
Collecting statsmodels
  Downloading statsmodels-0.14.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (9.5 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Downloading patsy-1.0.1-py2.py3-none-any.whl.metadata (3.3 kB)
Downloading statsmodels-0.14.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (10.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.5/10.5 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hDownloading patsy-1.0.1-py2.py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.9/232.9 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: patsy, statsmodels
Successfully installed patsy-1.0.1 statsmodels-0.14.5
Note: you may need to restart the kernel to use updated packages.


In [16]:
import statsmodels.api as sm

desc = sm.stats.DescrStatsW(hours)

ci_low, ci_high = desc.tconfint_mean(alpha=0.05)
print(f"Khoang tin cay 95% cho ky vong: [{ci_low:.6f}, {ci_high:.6f}] gio")

Khoang tin cay 95% cho ky vong: [6.065776, 6.717184] gio


## Bước 4. Kiểm định giả thuyết thống kê cho kỳ vọng với mức ý nghĩa 5\%

In [17]:
import statsmodels.api as sm
from statsmodels.stats.weightstats import ztest,ttest_ind

desc = sm.stats.DescrStatsW(hours)

mu0 = 7.3
alpha = 0.05

z_stat, p_value_z = desc.ztest_mean(value=mu0, alternative="smaller")
print(f"Z_qs = {z_stat:.6f}")
print(f"p-value (one-sided, H1: mu < {mu0}) = {p_value_z:.6e}")
print("Ket luan o muc y nghia 5%(z-test):", "Bac bo H0" if p_value_z < alpha else "Khong bac bo H0")

t_stat, p_value_t, dfree = desc.ttest_mean(value=mu0, alternative="smaller")
print(f"T_qs = {t_stat:.6f}, df = {dfree}")
print(f"p-value (one-sided, H1: mu < {mu0}) = {p_value_t:.6e}")
print("Ket luan o muc y nghia 5%(t-test):", "Bac bo H0" if p_value_t < alpha else "Khong bac bo H0")

Z_qs = -5.492302
p-value (one-sided, H1: mu < 7.3) = 1.983637e-08
Ket luan o muc y nghia 5%(z-test): Bac bo H0
T_qs = -5.492302, df = 264.0
p-value (one-sided, H1: mu < 7.3) = 4.662221e-08
Ket luan o muc y nghia 5%(t-test): Bac bo H0
