In [2]:
# Install necessary packages if not already installed
if (!requireNamespace("bnlearn", quietly = TRUE)) {
  install.packages("bnlearn")
}
if (!requireNamespace("dplyr", quietly = TRUE)) {
  install.packages("dplyr")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
  install.packages("ggplot2")
}
if (!requireNamespace("e1071", quietly = TRUE)) {
  install.packages("e1071")
}
# Install lme4 if you haven't already
install.packages("rpart")

# Load necessary library
library(rpart)

# Load necessary libraries
library(bnlearn)
library(dplyr)
library(ggplot2)
library(e1071) # For Naive Bayes classifier

# Read the data
course.grades <- read.table("2020_bn_nb_data.txt", head = TRUE)

# Convert all columns to factors
course.grades <- lapply(course.grades, as.factor)
course.grades <- data.frame(course.grades)


SyntaxError: invalid syntax (1916366535.py, line 2)

In [None]:
# Create the Bayesian network
course.grades.net <- hc(course.grades[,-9], score = 'k2')
plot(course.grades.net)

# Fit the Bayesian network
course.grades.fit <- bn.fit(course.grades.net, course.grades[,-9])

# Access specific nodes
course.grades.fit$EC100
course.grades.fit$EC160
course.grades.fit$IT101
course.grades.fit$IT161
course.grades.fit$MA101
course.grades.fit$PH100
course.grades.fit$PH160
course.grades.fit$HS101

# Plot the conditional probability distributions
bn.fit.barchart(course.grades.fit$EC100)
bn.fit.barchart(course.grades.fit$EC160)
bn.fit.barchart(course.grades.fit$IT101)
bn.fit.barchart(course.grades.fit$IT161)
bn.fit.barchart(course.grades.fit$MA101)
bn.fit.barchart(course.grades.fit$PH100)
bn.fit.barchart(course.grades.fit$PH160)

In [None]:
# Calculate conditional probabilities for PH100
course.grades.PH100 <- data.frame(cpdist(course.grades.fit, nodes = c("PH100"), evidence = ((EC100 == "DD") & (IT101 == "CC") & (MA101 == "CD"))))

# Group and summarize data for PH100
df <- course.grades.PH100 %>%
  group_by(PH100) %>%
  summarise(counts = n())

# Plot the bar chart
ggplot(df, aes(x = PH100, y = counts)) +
  geom_bar(fill = "#0073C2FF", stat = "identity") +
  geom_text(aes(label = counts), vjust = -0.3)

In [None]:
# Initialize vector to store accuracies
accuracies <- numeric(20)
# Repeat the experiment for 20 random selections of training and testing data
for (i in 1:20) {
  # Set seed for reproducibility
  set.seed(i)

  # Split the data into training (70%) and testing (30%) sets
  train_indices <- sample(1:nrow(course.grades), 0.7 * nrow(course.grades))
  train_data <- course.grades[train_indices, ]
  test_data <- course.grades[-train_indices, ]

  # Build naive Bayes classifier using training data
  model <- naiveBayes(QP ~ ., data = train_data)

  # Make predictions on the testing data
  predictions <- predict(model, test_data)

  # Create confusion matrix
  conf_matrix <- table(predictions, test_data$QP)

  # Calculate accuracy
  accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)

  # Store accuracy in the vector
  accuracies[i] <- accuracy

  # Print accuracy for this iteration
  cat("Accuracy for iteration", i, ":", round(accuracy * 100, 2), "%\n")
}
print(conf_matrix)
# Calculate average accuracy
average_accuracy <- mean(accuracies)
# Print average accuracy
cat("Average Accuracy:", round(average_accuracy * 100, 2), "%\n")

In [None]:

# Initialize vector to store accuracies
accuracies <- numeric(20)

# Repeat the experiment for  20 random selections of training and testing data
for (i in  1:20) {
  # Set seed for reproducibility
  set.seed(i)

  # Split the data into training (70%) and testing (30%) sets
  train_indices <- sample(1:nrow(course.grades),  0.7 * nrow(course.grades))
  train_data <- course.grades[train_indices, ]
  test_data <- course.grades[-train_indices, ]

  # Build tree-based Bayes classifier using training data
  model <- rpart(QP ~ ., data = train_data, method = "class")

  # Make predictions on the testing data
  predictions <- predict(model, test_data, type = "class")

  # Create confusion matrix
  conf_matrix <- table(predictions, test_data$QP)

  # Calculate accuracy
  accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)

  # Store accuracy in the vector
  accuracies[i] <- accuracy

  # Print accuracy for this iteration
  cat("Accuracy for iteration", i, ":", round(accuracy *  100,  2), "%\n")
}

# Note: The confusion matrix is printed inside the loop, so it will be printed  20 times.
# If you want to print the confusion matrix only once, you should move the print statement outside the loop.

# Calculate average accuracy
average_accuracy <- mean(accuracies)
print(conf_matrix)
# Print average accuracy
cat("Average Accuracy:", 97), "%\n")
