In [None]:
from scipy.stats import chi2_contingency, fisher_exact

# Given data
promoter_present = 26765   # Motif occurrences in the promoter region
genome_present = 217343   # Total motif occurrences in the genome

# Parameters
genome_size = 1333398936  # Total genome size in bp
promoter_size =  151034000  # Estimated promoter size in bp
rest_size = genome_size - promoter_size  # Size of the rest of the genome

# Calculations
rest_present = genome_present - promoter_present  # Motif occurrences in rest of the genome
promoter_absent = promoter_size - promoter_present  # Non-motif bp in promoters
rest_absent = rest_size - rest_present  # Non-motif bp in the rest of the genome

# Create the contingency table
contingency_table = [[promoter_present, promoter_absent],
                     [rest_present, rest_absent]]

# Perform  Chi-square
chi2, p_value_chi2, _, _ = chi2_contingency(contingency_table)

# Perform Fisher's
_, p_value_fisher = fisher_exact(contingency_table)

# results
print("Contingency Table:")
print(f"Promoter: {promoter_present} (Present), {promoter_absent} (Absent)")
print(f"Rest: {rest_present} (Present), {rest_absent} (Absent)\n")

print(f"Chi-square test p-value: {p_value_chi2}")
print(f"Fisher's exact test p-value: {p_value_fisher}")



Contingency Table:
Promoter: 26765 (Present), 151007235 (Absent)
Rest: 190578 (Present), 1182174358 (Absent)

Chi-square test p-value: 8.258999689155496e-48
Fisher's exact test p-value: 1.0340401262636922e-46
