In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def naive_bayes(x_train, y_train):
  """
  Implement the Naive Bayes algorithm from scratch.

  Args:
    x_train: The features of the training data.
    y_train: The labels of the training data.

  Returns:
    A function that predicts the label of a new data point.
  """

  # Calculate the prior probabilities of each class.
  prior_probs = np.array([np.sum(y_train == i) / len(y_train) for i in np.unique(y_train)])

  # Calculate the conditional probabilities of each feature given each class.
  conditional_probs = []
  for i in range(len(x_train[0])):
    conditional_probs.append(np.array([np.sum(x_train[:, i] == j) / len(y_train == i) for j in np.unique(x_train[:, i])]) for i in range(len(x_train[0])))

  # Define a function that predicts the label of a new data point.
  def predict(x_new):
    prob_per_class = []
    for i in range(len(y_train)):
      prob_per_class.append(prior_probs[i] * np.prod(conditional_probs[i][x_new]))
    return np.argmax(prob_per_class)

  return predict

def k_nearest_neighbors(x_train, y_train, k):
  
 # Implement the K-Nearest Neighbours algorithm from scratch.

  #Args:
    #x_train: The features of the training data.
    #y_train: The labels of the training data.
    #k: The number of nearest neighbors to consider.

 # Returns:
  #  A function that predicts the label of a new data point.
  

  # Calculate the distances between the new data point and all the training data points.
  distances = np.linalg.norm(x_train - x_new, axis=1)

  # Find the k nearest neighbors.
  nearest_neighbors = np.argsort(distances)[:k]

  # Return the majority label of the k nearest neighbors.
  return np.mode(y_train[nearest_neighbors])

def main():
  # Load the Iris dataset.
  df = pd.read_csv('gender_submission.csv')
  predictions = df['Survived']

  print(predictions)
  df_train = pd.read_csv('train.csv')
  df_test = pd.read_csv('test.csv')

  x_train, x_test, y_train, y_test = train_test_split(df_train.drop('Survived', axis=1), df_train['Survived'], test_size=0.25)

# Train a machine learning model
 # model = ...

# Make predictions on the test set
 # predictions = model.predict(x_test)

# Evaluate the accuracy of the model
 # accuracy = np.mean(predictions == y_test)

 # print('Accuracy:', accuracy)

  # Split the data into training and test sets.
 # x_train, x_test, y_train, y_test = train_test_split(df.drop('Survived', axis=1), df['Survived'], test_size=0.25)

  # Train the Naive Bayes classifier.
  nb_clf = naive_bayes(x_train, y_train)

  # Train the K-Nearest Neighbours classifier.
  knn_clf = k_nearest_neighbors(x_train, y_train, 5)

  # Make predictions on the test set.
  nb_predictions = nb_clf(x_test)
  knn_predictions = knn_clf(x_test)

  # Evaluate the accuracy of the classifiers.
  nb_accuracy = np.mean(nb_predictions == y_test)
  knn_accuracy = np.mean(knn_predictions == y_test)

  # Print the accuracy of the classifiers.
  print('Naive Bayes accuracy:', nb_accuracy)
  print('K-Nearest Neighbours accuracy:', knn_accuracy)

  # Plot the decision boundaries of the classifiers.
  plt.figure()
  plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap='tab10')
  plt.plot(x_test)
  plt.show()