### ANOVA

Scenario: Used to test for significant differences between the means of three or more independent groups.
    
Example: Testing if mean monthly sales differ across three store locations.

Dataset: store_sales.csv with columns Store, MonthlySales

In [None]:
from scipy.stats import f_oneway

# Example - Store sales data
sales_data = pd.read_csv('store_sales.csv')
store_a = sales_data[sales_data['Store'] == 'A']['MonthlySales']
store_b = sales_data[sales_data['Store'] == 'B']['MonthlySales']
store_c = sales_data[sales_data['Store'] == 'C']['MonthlySales']
f_stat, p_value = f_oneway(store_a, store_b, store_c)
print("ANOVA for store sales:", p_value)

### ANOVA on Study Methods

In [1]:
import pandas as pd
from scipy.stats import f_oneway

# Load data
data = pd.DataFrame({
    'StudyMethod': ['Method A', 'Method A', 'Method A', 'Method B', 'Method B', 'Method B', 'Method C', 'Method C', 'Method C'],
    'Score': [85, 88, 90, 78, 82, 79, 92, 95, 94]
})

# Separate the scores by study method
method_a_scores = data[data['StudyMethod'] == 'Method A']['Score']
method_b_scores = data[data['StudyMethod'] == 'Method B']['Score']
method_c_scores = data[data['StudyMethod'] == 'Method C']['Score']

# Perform one-way ANOVA
f_stat, p_value = f_oneway(method_a_scores, method_b_scores, method_c_scores)

print("F-statistic:", f_stat)
print("p-value:", p_value)

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: At least one study method has a different mean score.")
else:
    print("Fail to reject the null hypothesis: No significant difference between study methods.")


F-statistic: 34.15384615384615
p-value: 0.0005264443012584103
Reject the null hypothesis: At least one study method has a different mean score.


### Implementing ANOVA on Heart Dataset

In [7]:
import pandas as pd
from scipy.stats import f_oneway

# Load dataset
# Assume 'heart.csv' has columns: 'age', 'sex', 'cp', 'chol'
heart_data = pd.read_csv('heart.csv')

# Separate cholesterol levels by chest pain type
chol_cp_0 = heart_data[heart_data['ChestPain'] == 'typical']['Chol']  # Typical angina
chol_cp_1 = heart_data[heart_data['ChestPain'] == 'asymptomatic']['Chol']  # Atypical angina
chol_cp_2 = heart_data[heart_data['ChestPain'] == 'nonanginal']['Chol']  # Non-anginal pain
chol_cp_3 = heart_data[heart_data['ChestPain'] == 'nontypical']['Chol']  # Asymptomatic



In [8]:
heart_data.head()

Unnamed: 0.1,Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD
0,1,63,1,typical,145,233,1,2,150,0,2.3,3,0.0,fixed,No
1,2,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3.0,normal,Yes
2,3,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2.0,reversable,Yes
3,4,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0.0,normal,No
4,5,41,0,nontypical,130,204,0,2,172,0,1.4,1,0.0,normal,No


In [9]:
# Perform one-way ANOVA
f_stat, p_value = f_oneway(chol_cp_0, chol_cp_1, chol_cp_2, chol_cp_3)

print("F-statistic:", f_stat)
print("p-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant difference in average cholesterol levels between chest pain types.")
else:
    print("Fail to reject the null hypothesis: No significant difference in cholesterol levels between chest pain types.")


F-statistic: 0.6228824563331001
p-value: 0.600691502205988
Fail to reject the null hypothesis: No significant difference in cholesterol levels between chest pain types.
