# A Probabilistic Approach to Flu and COVID Diagnosis
## Given a set of observed symptoms and patient conditions, what is the probability that the patient is suffering from Flu or COVID-19 using Bayesian Networks?
p(covid) = 0.4

p(flu) = 0.4

p(none) = 0.2

In [1]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Set seed for reproducibility
np.random.seed(42)

# Define number of samples
n = 1000

# Define probabilities for diseases
p_covid = 0.4
p_flu = 0.4
p_none = 0.2

# Randomly assign diseases
diseases = np.random.choice(
    ['covid', 'flu', 'none'],
    size=n,
    p=[p_covid, p_flu, p_none]
)

# Symptom probabilities conditioned on disease
symptom_probs = {
    'fever': {'covid': 0.85, 'flu': 0.75, 'none': 0.1},
    'cough': {'covid': 0.80, 'flu': 0.70, 'none': 0.1},
    'fatigue': {'covid': 0.70, 'flu': 0.65, 'none': 0.2},
    'loss_of_smell': {'covid': 0.60, 'flu': 0.05, 'none': 0.01},
    'sore_throat': {'covid': 0.45, 'flu': 0.60, 'none': 0.05},
    'shortness_of_breath': {'covid': 0.55, 'flu': 0.15, 'none': 0.01},
    'contact_with_infected': {'covid': 0.75, 'flu': 0.40, 'none': 0.05}
}

# Generate symptoms based on disease
data = {
    'disease': diseases
}

for symptom, probs in symptom_probs.items():
    data[symptom] = [
        np.random.rand() < probs[d] for d in diseases
    ]

# Add age group (random but realistic distribution)
age_groups = ['<18', '18-30', '31-50', '51-70', '70+']
age_probs = [0.1, 0.35, 0.30, 0.15, 0.10]
data['age_group'] = np.random.choice(age_groups, size=n, p=age_probs)

# Add vaccination status (binary, assuming 60% are vaccinated)
data['vaccinated'] = np.random.rand(n) < 0.6

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to Kaggle working directory
output_path = '/kaggle/working/synthetic_covid_flu_dataset.csv'
df.to_csv(output_path, index=False)

print(f"✅ Dataset saved to: {output_path}")
df.head()

✅ Dataset saved to: /kaggle/working/synthetic_covid_flu_dataset.csv


Unnamed: 0,disease,fever,cough,fatigue,loss_of_smell,sore_throat,shortness_of_breath,contact_with_infected,age_group,vaccinated
0,covid,True,True,True,True,True,False,True,31-50,False
1,none,False,False,False,False,False,False,False,31-50,True
2,flu,False,False,True,False,False,False,False,<18,False
3,flu,True,True,True,False,True,False,False,70+,True
4,covid,True,True,True,True,False,True,True,31-50,False
