In [3]:
import pandas as pd
from scipy.stats import chi2_contingency
#variable description: Survived		1 = Yes, 0 = No
# Please upload titanic.txt file to files in the left side. Select the content folder and upload the file there so that the file path in the code below works.
# Loading the dataset
file_path = pd.read_csv('/content/titanic.txt', delimiter='\t')  # Because of tab-delimited text, used '\t'

# Displaying the first few rows to understand the structure of the dataset
print(file_path.head())

# Assessing survival by passenger class
# Creating a contingency table for Survival and Passenger Class
contingency_table_pc = pd.crosstab(file_path['Survived'], file_path['PClass'])
print("Contingency Table for Survival and Passenger Class:\n", contingency_table_pc)

# Defining Hypotheses according to lab file
# Null Hypothesis (H0): Survival is independent of passenger class.
# Alternative Hypothesis (H1): Survival is dependent on passenger class.

# Performing the Chi-square test
# We do chi-square test to determine if there is a significant association between two categorical variables which are passenger class and survival status.
chi2_stat_passclass, p_value_passclass, dof_passclass, expected_passclass = chi2_contingency(contingency_table_pc)
print(f"Chi-square Statistic: {chi2_stat_passclass}")
print(f"p-value: {p_value_passclass}")
print(f"Degrees of Freedom: {dof_passclass}")
print(f"Expected Frequencies:\n{expected_passclass}")

# Result
alpha = 0.05  # significance level
if p_value_passclass < alpha :
    print("Reject the null hypothesis: Evidence that survival depends on passenger class.")
else:
    print("Fail to reject the null hypothesis: No evidence that survival depends on passenger class.")

# Assessing survival by gender
contingency_table_gender = pd.crosstab(file_path['Survived'], file_path['Sex'])
print("Contingency Table for Survival and Gender :\n", contingency_table_gender)

# Defining Hypotheses for gender
# Null Hypothesis (H0): Survival is independent of gender.
# Alternative Hypothesis (H1): Survival is dependent on gender.

# Performing the Chi-square test for gender and survival
chi2_stat_gender, p_value_gender, dof_gender, expected_gender = chi2_contingency(contingency_table_gender)
print(f"Chi-square Statistic: {chi2_stat_gender}")
print(f"p-value: {p_value_gender}")
print(f"Degrees of Freedom: {dof_gender}")
print(f"Expected Frequencies:\n{expected_gender}")

#Result
if p_value_gender < alpha:
    print("Reject the null hypothesis (H0): Evidence that survival depends on gender.")
else:
    print("Fail to reject the null hypothesis (H0): No evidence that survival depends on gender.")

#Observations
# If the p-value is less than the chosen significance level (commonly 0.05), will be rejected H0  and support H1. If it is greater, will fail to reject H0.
# The t-test and z-test, suitable for continuous data, are unnecessary here since gender and class are categorical; instead, the chi-square test appropriately examines differences in categorical distributions.
# A significant association exists between passenger class and survival, with a chi-square statistic of 172.30 and a p-value < 0.001, indicating survival is dependent on passenger class.
# The first class had a higher survival rate compared to second and third classes
# A strong association is evident between gender and survival, with a chi-square statistic of 329.84 and a p-value < 0.001, showing that survival is dependent on gender
# Females had a notably higher survival rate compared to males

                                            Name PClass    Age     Sex  \
0                   Allen, Miss Elisabeth Walton    1st  29.00  female   
1                    Allison, Miss Helen Loraine    1st   2.00  female   
2            Allison, Mr Hudson Joshua Creighton    1st  30.00    male   
3  Allison, Mrs Hudson JC (Bessie Waldo Daniels)    1st  25.00  female   
4                  Allison, Master Hudson Trevor    1st   0.92    male   

   Survived  
0         1  
1         0  
2         0  
3         0  
4         1  
Contingency Table for Survival and Passenger Class:
 PClass    1st  2nd  3rd
Survived               
0         129  161  573
1         193  119  138
Chi-square Statistic: 172.29911827411786
p-value: 3.852315502424536e-38
Degrees of Freedom: 2
Expected Frequencies:
[[211.64204113 184.0365575  467.32140137]
 [110.35795887  95.9634425  243.67859863]]
Reject the null hypothesis: Evidence that survival depends on passenger class.
Contingency Table for Survival and Gender 