# Estatística inferencial

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.datasets import get_rdataset

penguins: pd.DataFrame = get_rdataset("penguins", "palmerpenguins").data

penguins: pd.DataFrame = penguins.dropna().reset_index(drop=True)

penguins.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male,2007
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female,2007
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female,2007
3,Adelie,Torgersen,36.7,19.3,193.0,3450.0,female,2007
4,Adelie,Torgersen,39.3,20.6,190.0,3650.0,male,2007


## Amostragem

In [2]:
# Amostragem aleatória simples

penguins.sample(5)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
89,Adelie,Dream,40.8,18.9,208.0,4300.0,male,2008
67,Adelie,Torgersen,45.8,18.9,197.0,4150.0,male,2008
249,Gentoo,Biscoe,41.7,14.7,210.0,4700.0,female,2009
109,Adelie,Biscoe,42.7,18.3,196.0,4075.0,male,2009
285,Chinstrap,Dream,42.4,17.3,181.0,3600.0,female,2007


In [3]:
# Amostragem aleatória estratificada

penguins.groupby("species").apply(lambda df: df.sample(2))

Unnamed: 0_level_0,Unnamed: 1_level_0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Adelie,91,Adelie,Dream,40.3,18.5,196.0,4350.0,male,2008
Adelie,22,Adelie,Biscoe,40.5,17.9,187.0,3200.0,female,2007
Chinstrap,288,Chinstrap,Dream,50.6,19.4,193.0,3800.0,male,2007
Chinstrap,330,Chinstrap,Dream,49.6,18.2,193.0,3775.0,male,2009
Gentoo,170,Gentoo,Biscoe,42.9,13.1,215.0,5000.0,female,2007
Gentoo,259,Gentoo,Biscoe,48.8,16.2,222.0,6000.0,male,2009


## Testes de hipóteses


In [4]:
# Teste t de Student com uma amostra

sample = stats.norm.rvs(loc=16.808, scale=2.733, size=25)

stats.ttest_1samp(sample, popmean=18, alternative="less")

Ttest_1sampResult(statistic=-2.155241025579814, pvalue=0.02069428365963002)

In [5]:
# Teste t de Student com duas amostras independentes

sample1 = [22.8, 23.4, 26.2, 24.3, 22.0, 24.8, 26.7, 25.1, 23.1, 22.8, 25.6, 25.1, 24.3, 24.2, 22.8, 23.2, 24.7, 26.5, 24.5, 23.6, 23.9, 22.8, 25.4, 26.7, 22.9, 23.5, 23.8, 24.6, 26.3, 22.7]

sample2 = [26.8, 29.3, 28.4, 25.6, 29.4, 27.2, 27.6, 26.8, 25.4, 28.6, 29.7, 27.2, 27.9, 28.4, 26.0, 26.8, 27.5, 28.5, 27.3, 29.1, 29.2, 25.7, 28.4, 28.6, 27.9, 27.4, 26.7, 26.8, 25.6, 26.1]

stats.ttest_ind(sample1, sample2, alternative="two-sided", equal_var=True)

Ttest_indResult(statistic=-9.70841463275532, pvalue=9.185777525386829e-14)

In [6]:
# Teste t de Welch

sample1 = [22.8, 23.4, 26.2, 24.3, 22.0, 24.8, 26.7, 25.1, 23.1, 22.8, 25.6, 25.1, 24.3, 24.2, 22.8, 23.2, 24.7, 26.5, 24.5, 23.6, 23.9, 22.8, 25.4, 26.7, 22.9, 23.5, 23.8, 24.6, 26.3, 22.7]

sample2 = [26.8, 29.3, 28.4, 25.6, 29.4, 27.2, 27.6, 26.8, 25.4, 28.6, 29.7, 27.2, 27.9, 28.4, 26.0, 26.8, 27.5, 28.5, 27.3, 29.1, 29.2, 25.7, 28.4, 28.6, 27.9, 27.4, 26.7, 26.8, 25.6, 26.1]

stats.ttest_ind(sample1, sample2, alternative="two-sided", equal_var=False)

Ttest_indResult(statistic=-9.70841463275532, pvalue=9.728921191025935e-14)

In [7]:
# Teste de independência qui-quadrado

contingency_table = pd.DataFrame(
    {
        "avaliação baixa": [40, 16, 12],
        "avaliação média": [32, 24, 16],
        "avaliação alta": [24, 32, 4],
    },
    index=["Plano 1", "Plano 2", "Plano 3"],
)

test_statistic_value, p_value, degrees_of_freedom, expected_frequencies = stats.chi2_contingency(contingency_table, correction=False)

print(f"Valor da estatística de teste: {test_statistic_value}")

print(f"p-value: {p_value}")

print(f"Graus de liberdade da distribuição qui-quadrado: {degrees_of_freedom}")

print(f"Frequências esperadas:\n{expected_frequencies}")

Valor da estatística de teste: 15.860566448801741
p-value: 0.0032120846981537215
Graus de liberdade da distribuição qui-quadrado: 4
Frequências esperadas:
[[32.64 34.56 28.8 ]
 [24.48 25.92 21.6 ]
 [10.88 11.52  9.6 ]]
