In [2]:
import csv
from math import sqrt

# Load data from a text file
def load_data(filename):
    dataset = []
    with open(filename, 'r') as file:
        for line in file:
            if line.strip():  # Skip empty lines
                dataset.append(line.strip().split('\t'))  # Assuming tab-separated values
    return dataset

# Replace missing values with the column mean
def replace_missing_values(dataset, missing_value='1.00000000000000e+99'):
    for col in range(len(dataset[0])):
        column_values = [float(row[col]) for row in dataset if row[col] != missing_value]
        mean_value = sum(column_values) / len(column_values)
        for row in dataset:
            if row[col] == missing_value:
                row[col] = mean_value

# Calculate the Euclidean distance between two points
def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1)-1):  # Exclude the label
        distance += (float(row1[i]) - float(row2[i])) ** 2
    return sqrt(distance)

# Locate the most similar neighbors
def get_neighbors(train, test_row, num_neighbors):
    distances = list()
    for train_row in train:
        dist = euclidean_distance(test_row, train_row)
        distances.append((train_row, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    for i in range(num_neighbors):
        neighbors.append(distances[i][0])
    return neighbors

# Make a prediction with neighbors
def predict_classification(train, test_row, num_neighbors):
    neighbors = get_neighbors(train, test_row, num_neighbors)
    output_values = [row[-1] for row in neighbors]
    prediction = max(set(output_values), key=output_values.count)
    return prediction

# k-NN Algorithm
def k_nearest_neighbors(train, test, num_neighbors):
    predictions = []
    for row in test:
        output = predict_classification(train, row, num_neighbors)
        predictions.append(output)
    return predictions

# Save predictions to a text file
def save_predictions(predictions, filename):
    with open(filename, 'w') as file:
        for pred in predictions:
            file.write(f"{pred}\n")

# Example usage
train_filename = 'TrainData1.txt'
test_filename = 'TestData1.txt'
train_data = load_data(train_filename)
test_data = load_data(test_filename)
replace_missing_values(train_data)
replace_missing_values(test_data)
num_neighbors = 3  # This can be tuned
predictions = k_nearest_neighbors(train_data, test_data, num_neighbors)
save_predictions(predictions, 'PredictedLabels1.txt')
