Ονοματεπώνυμο: Ζαμάγιας Μιχαήλ Ανάργυρος

ΑΜ: ΤΠ5000

# Import modules

In [1]:
import pandas as pd
from scipy.stats import chisquare, chi2

# Hypothesis test

Researcher A claims that the probability of a patient belonging to one of the five categories is equal for all categories.
Researcher B, though, claims that the probability of a patient belonging to one of the five categories is not equal for all categories.

$H_0$: Researcher A is right.

$H_1$: Researcher B is right.

In [3]:
null_hypothesis = 'The probability of a patient belonging to one of the five categories is equal for all categories.'
alternative_hypothesis = 'The probability of a patient belonging to one of the five categories is not equal for all categories.'

## SPSS method

#### Declare initial values

In [14]:
significance_level = 0.05
confidence_level = 1-significance_level
population_size = 60
disease_data = pd.DataFrame(
    {'Observed Data': [15, 12, 9, 9, 15]},
    index=range(1, 6)
)
disease_data.index.names = ['Category']
print(disease_data)


          Observed Data
Category               
1                    15
2                    12
3                     9
4                     9
5                    15


#### Calculate degrees of freedom, $c$

In [5]:
degrees_of_freedom = len(disease_data) - 1
print(f'{degrees_of_freedom = }')

degrees_of_freedom = 4


#### Calculate chi-square, $\chi^2$

In [6]:
observed_chi2 = chisquare(observed=disease_data['Observed Data'])[0]
print(f'{observed_chi2 = }')

observed_chi2 = 3.0


#### Calculate critical value, $\chi_c^2$

In [7]:
chi2_critical_value = chi2.ppf(1 - significance_level, df=degrees_of_freedom)
print(f'{chi2_critical_value = }')

chi2_critical_value = 9.487729036781154


#### Hypothesis test conclusion

In [8]:
if observed_chi2 < chi2_critical_value:
    print(null_hypothesis)
    print('Researcher A is right.')
else:
    print(alternative_hypothesis)
    print('Researcher B is right.')

The probability of a patient belonging to one of the five categories is equal for all categories.
Researcher A is right.


## Another way (solve test from both observed and expected data)

#### Declare expected data

In [9]:
disease_data['Expected Data'] = [20, 14, 12, 10, 16]
print(disease_data)

          Observed Data  Expected Data
Category                              
1                    15             20
2                    12             14
3                     9             12
4                     9             10
5                    15             16


In [12]:
p_value = chisquare(disease_data['Observed Data'], disease_data['Expected Data'])[1]

In [13]:
if p_value >= significance_level:
    print(null_hypothesis)
    print('Researcher A is right.')
else:
    print(alternative_hypothesis)
    print('Researcher B is right.')

The probability of a patient belonging to one of the five categories is equal for all categories.
Researcher A is right.
