<a href="https://colab.research.google.com/github/mr-cri-spy/machine-_-learning-/blob/main/breast_cancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from pgmpy.models import BayesianNetwork
from pgmpy.inference import VariableElimination
from pgmpy.factors.discrete import TabularCPD

# Define the structure
model = BayesianNetwork([('D', 'F'), ('D', 'C')])

# Define the CPTs
cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.1], [0.9]])
cpd_f = TabularCPD(variable='F', variable_card=2,
                   values=[[0.8, 0.2], [0.2, 0.8]],
                   evidence=['D'], evidence_card=[2])
cpd_c = TabularCPD(variable='C', variable_card=2,
                   values=[[0.7, 0.3], [0.3, 0.7]],
                   evidence=['D'], evidence_card=[2])

# Add the CPTs to the model
model.add_cpds(cpd_d, cpd_f, cpd_c)

# Verify the model
model.check_model()

# Perform inference
inference = VariableElimination(model)

# Compute P(D | F=1)
result = inference.query(variables=['D'], evidence={'F': 1})
print(result)


+------+----------+
| D    |   phi(D) |
| D(0) |   0.0270 |
+------+----------+
| D(1) |   0.9730 |
+------+----------+


  cpd_f = TabularCPD(variable='F', variable_card=2,
  cpd_c = TabularCPD(variable='C', variable_card=2,


In [None]:


import pandas as pd
from sklearn.model_selection import train_test_split
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Sample dataset
sample_data = {
    'age': [63, 67, 67, 37, 41, 56, 62, 57, 63, 53],
    'sex': [1, 1, 1, 1, 0, 1, 0, 0, 1, 1],
    'chol': [233, 286, 229, 250, 204, 236, 268, 354, 254, 203],
    'trestbps': [145, 160, 120, 130, 130, 120, 140, 140, 135, 140],
    'fbs': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'heart_disease': [1, 1, 1, 0, 0, 1, 0, 1, 1, 0]
}

# Convert the sample data into a DataFrame
heart_data = pd.DataFrame(sample_data)

# Discretize the 'age' variable into categories
age_bins = [20, 40, 60, 80]
age_labels = ['20-39', '40-59', '60-79']
heart_data['age'] = pd.cut(heart_data['age'], bins=age_bins, labels=age_labels)

# Convert columns to categorical types
for col in heart_data.columns:
    heart_data[col] = heart_data[col].astype('category')

# Display the first few rows of the dataset
print(heart_data.head())

# Split the data into training and testing sets
train_data, test_data = train_test_split(heart_data, test_size=0.2, random_state=42)

# Define the structure of the Bayesian Network
model = BayesianNetwork([('age', 'trestbps'),
                         ('age', 'fbs'),
                         ('sex', 'trestbps'),
                         ('trestbps', 'heart_disease'),
                         ('chol', 'heart_disease'),
                         ('fbs', 'heart_disease')])

# Fit the model using Maximum Likelihood Estimation
model.fit(train_data, estimator=MaximumLikelihoodEstimator)

# Perform inference
infer = VariableElimination(model)

# Query the model to calculate the probability of heart disease given new data
query_result = infer.query(variables=['heart_disease'], evidence={
    'age': '40-59',  # Use discrete age category
    'sex': 1,
    'chol': 250,
    'trestbps': 130,
    'fbs': 0
})

print(query_result)

     age sex chol trestbps fbs heart_disease
0  60-79   1  233      145   1             1
1  60-79   1  286      160   0             1
2  60-79   1  229      120   0             1
3  20-39   1  250      130   0             0
4  40-59   0  204      130   0             0
+------------------+----------------------+
| heart_disease    |   phi(heart_disease) |
| heart_disease(0) |                  nan |
+------------------+----------------------+
| heart_disease(1) |                  nan |
+------------------+----------------------+


  data.groupby([variable] + parents).size().unstack(parents)
  data.groupby([variable] + parents).size().unstack(parents)
  data.groupby([variable] + parents).size().unstack(parents)
  phi.values = phi.values / phi.values.sum()


In [None]:


import pandas as pd
from sklearn.model_selection import train_test_split
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Sample dataset
sample_data = {
    'age': [63, 67, 67, 37, 41, 56, 62, 57, 63, 53],
    'sex': [1, 1, 1, 1, 0, 1, 0, 0, 1, 1],
    'chol': [233, 286, 229, 250, 204, 236, 268, 354, 254, 203],
    'trestbps': [145, 160, 120, 130, 130, 120, 140, 140, 135, 140],
    'fbs': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'heart_disease': [1, 1, 1, 0, 0, 1, 0, 1, 1, 0]
}

# Convert the sample data into a DataFrame
heart_data = pd.DataFrame(sample_data)

# Discretize the 'age' variable into categories
age_bins = [20, 40, 60, 80]
age_labels = ['20-39', '40-59', '60-79']
heart_data['age'] = pd.cut(heart_data['age'], bins=age_bins, labels=age_labels)

# Convert columns to categorical types
for col in heart_data.columns:
    heart_data[col] = heart_data[col].astype('category')

# Display the first few rows of the dataset
print(heart_data.head())

# Split the data into training and testing sets
train_data, test_data = train_test_split(heart_data, test_size=0.2, random_state=42)

# Define the structure of the Bayesian Network
model = BayesianNetwork([('age', 'trestbps'),
                         ('age', 'fbs'),
                         ('sex', 'trestbps'),
                         ('trestbps', 'heart_disease'),
                         ('chol', 'heart_disease'),
                         ('fbs', 'heart_disease')])

# Fit the model using Maximum Likelihood Estimation
model.fit(train_data, estimator=MaximumLikelihoodEstimator)

# Perform inference
infer = VariableElimination(model)

# Query the model to calculate the probability of heart disease given new data
query_result = infer.query(variables=['heart_disease'], evidence={
    'age': '40-59',  # Use discrete age category
    'sex': 1,
    'chol': 250,
    'trestbps': 130,
    'fbs': 0
})

print(query_result)

     age sex chol trestbps fbs heart_disease
0  60-79   1  233      145   1             1
1  60-79   1  286      160   0             1
2  60-79   1  229      120   0             1
3  20-39   1  250      130   0             0
4  40-59   0  204      130   0             0
+------------------+----------------------+
| heart_disease    |   phi(heart_disease) |
| heart_disease(0) |                  nan |
+------------------+----------------------+
| heart_disease(1) |                  nan |
+------------------+----------------------+


  data.groupby([variable] + parents).size().unstack(parents)
  data.groupby([variable] + parents).size().unstack(parents)
  data.groupby([variable] + parents).size().unstack(parents)
  phi.values = phi.values / phi.values.sum()


In [None]:


import pandas as pd
from sklearn.model_selection import train_test_split
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Sample dataset
sample_data = {
    'age': [63, 67, 67, 37, 41, 56, 62, 57, 63, 53],
    'sex': [1, 1, 1, 1, 0, 1, 0, 0, 1, 1],
    'chol': [233, 286, 229, 250, 204, 236, 268, 354, 254, 203],
    'trestbps': [145, 160, 120, 130, 130, 120, 140, 140, 135, 140],
    'fbs': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'heart_disease': [1, 1, 1, 0, 0, 1, 0, 1, 1, 0]
}

# Convert the sample data into a DataFrame
heart_data = pd.DataFrame(sample_data)

# Discretize the 'age' variable into categories
age_bins = [20, 40, 60, 80]
age_labels = ['20-39', '40-59', '60-79']
heart_data['age'] = pd.cut(heart_data['age'], bins=age_bins, labels=age_labels)

# Convert columns to categorical types
for col in heart_data.columns:
    heart_data[col] = heart_data[col].astype('category')

# Display the first few rows of the dataset
print(heart_data.head())

# Split the data into training and testing sets
train_data, test_data = train_test_split(heart_data, test_size=0.2, random_state=42)

# Define the structure of the Bayesian Network
model = BayesianNetwork([('age', 'trestbps'),
                         ('age', 'fbs'),
                         ('sex', 'trestbps'),
                         ('trestbps', 'heart_disease'),
                         ('chol', 'heart_disease'),
                         ('fbs', 'heart_disease')])

# Fit the model using Maximum Likelihood Estimation
model.fit(train_data, estimator=MaximumLikelihoodEstimator)

# Perform inference
infer = VariableElimination(model)

# Query the model to calculate the probability of heart disease given new data
query_result = infer.query(variables=['heart_disease'], evidence={
    'age': '40-59',  # Use discrete age category
    'sex': 1,
    'chol': 250,
    'trestbps': 130,
    'fbs': 0
})

print(query_result)

     age sex chol trestbps fbs heart_disease
0  60-79   1  233      145   1             1
1  60-79   1  286      160   0             1
2  60-79   1  229      120   0             1
3  20-39   1  250      130   0             0
4  40-59   0  204      130   0             0
+------------------+----------------------+
| heart_disease    |   phi(heart_disease) |
| heart_disease(0) |                  nan |
+------------------+----------------------+
| heart_disease(1) |                  nan |
+------------------+----------------------+


  data.groupby([variable] + parents).size().unstack(parents)
  data.groupby([variable] + parents).size().unstack(parents)
  data.groupby([variable] + parents).size().unstack(parents)
  phi.values = phi.values / phi.values.sum()


In [None]:


import pandas as pd
from sklearn.model_selection import train_test_split
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Sample dataset
sample_data = {
    'age': [63, 67, 67, 37, 41, 56, 62, 57, 63, 53],
    'sex': [1, 1, 1, 1, 0, 1, 0, 0, 1, 1],
    'chol': [233, 286, 229, 250, 204, 236, 268, 354, 254, 203],
    'trestbps': [145, 160, 120, 130, 130, 120, 140, 140, 135, 140],
    'fbs': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'heart_disease': [1, 1, 1, 0, 0, 1, 0, 1, 1, 0]
}

# Convert the sample data into a DataFrame
heart_data = pd.DataFrame(sample_data)

# Discretize the 'age' variable into categories
age_bins = [20, 40, 60, 80]
age_labels = ['20-39', '40-59', '60-79']
heart_data['age'] = pd.cut(heart_data['age'], bins=age_bins, labels=age_labels)

# Convert columns to categorical types
for col in heart_data.columns:
    heart_data[col] = heart_data[col].astype('category')

# Display the first few rows of the dataset
print(heart_data.head())

# Split the data into training and testing sets
train_data, test_data = train_test_split(heart_data, test_size=0.2, random_state=42)

# Define the structure of the Bayesian Network
model = BayesianNetwork([('age', 'trestbps'),
                         ('age', 'fbs'),
                         ('sex', 'trestbps'),
                         ('trestbps', 'heart_disease'),
                         ('chol', 'heart_disease'),
                         ('fbs', 'heart_disease')])

# Fit the model using Maximum Likelihood Estimation
model.fit(train_data, estimator=MaximumLikelihoodEstimator)

# Perform inference
infer = VariableElimination(model)

# Query the model to calculate the probability of heart disease given new data
query_result = infer.query(variables=['heart_disease'], evidence={
    'age': '40-59',  # Use discrete age category
    'sex': 1,
    'chol': 250,
    'trestbps': 130,
    'fbs': 0
})

print(query_result)

     age sex chol trestbps fbs heart_disease
0  60-79   1  233      145   1             1
1  60-79   1  286      160   0             1
2  60-79   1  229      120   0             1
3  20-39   1  250      130   0             0
4  40-59   0  204      130   0             0
+------------------+----------------------+
| heart_disease    |   phi(heart_disease) |
| heart_disease(0) |                  nan |
+------------------+----------------------+
| heart_disease(1) |                  nan |
+------------------+----------------------+


  data.groupby([variable] + parents).size().unstack(parents)
  data.groupby([variable] + parents).size().unstack(parents)
  data.groupby([variable] + parents).size().unstack(parents)
  phi.values = phi.values / phi.values.sum()
