In [6]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier

# Step 1: Generate dataset
np.random.seed(42)  # Ensures the results are reproducible
values = np.random.rand(100)  # Generates 100 random values between 0 and 1
labels = []
for i in values[:50]:  # First 50 values are labeled based on the condition
    if i <= 0.5:
        labels.append('Class1')
    else:
        labels.append('Class2')

labels += [None] * 50  # The next 50 values are unlabeled (None)

# Create a dictionary for the data
data = {
    "Point": [f"x{i+1}" for i in range(100)],  # Names each data point (x1, x2, ..., x100)
    "Value": values,  # Contains the random values generated
    "Label": labels  # Contains the labels (Class1, Class2, or None)
}

# Convert dictionary to DataFrame
df = pd.DataFrame(data)

# Step 2: Split data into labeled and unlabeled
labeled_df = df[df['Label'].notna()]  # Filters rows where Label is not None
X_train = labeled_df[["Value"]]  # Features for training
y_train = labeled_df["Label"]  # Labels for training
unlabeled_df = df[df['Label'].isna()]  # Filters rows where Label is None
X_test = unlabeled_df[["Value"]]  # Features for testing

# Step 3: Perform KNN classification for different values of k
k_values = [1, 2, 3, 4, 5, 20, 30]
results = {}  # Dictionary to store predictions for each k
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)  # Initialize KNN model
    knn.fit(X_train, y_train)  # Train the model
    predictions = knn.predict(X_test)  # Predict labels for test data
    results[k] = predictions  # Store predictions
    # Assign predictions to the DataFrame
    unlabeled_df[f"Label_k{k}"] = predictions

# Step 4: Create final DataFrame with predictions
df1 = unlabeled_df.drop(columns=['Label'], axis=1)  # Remove original Label column

# Display results
print("Predictions for k=30:")
print(results[30])
print("\nFinal DataFrame with predictions:")
print(df1)

Predictions for k=30:
['Class2' 'Class2' 'Class2' 'Class2' 'Class2' 'Class2' 'Class1' 'Class1'
 'Class1' 'Class1' 'Class1' 'Class1' 'Class2' 'Class1' 'Class1' 'Class2'
 'Class1' 'Class2' 'Class1' 'Class2' 'Class2' 'Class1' 'Class1' 'Class2'
 'Class2' 'Class2' 'Class2' 'Class1' 'Class1' 'Class1' 'Class2' 'Class2'
 'Class1' 'Class1' 'Class1' 'Class1' 'Class2' 'Class2' 'Class2' 'Class1'
 'Class1' 'Class2' 'Class2' 'Class2' 'Class2' 'Class1' 'Class2' 'Class1'
 'Class1' 'Class1']

Final DataFrame with predictions:
   Point     Value Label_k1 Label_k2 Label_k3 Label_k4 Label_k5 Label_k20  \
50   x51  0.969585   Class2   Class2   Class2   Class2   Class2    Class2   
51   x52  0.775133   Class2   Class2   Class2   Class2   Class2    Class2   
52   x53  0.939499   Class2   Class2   Class2   Class2   Class2    Class2   
53   x54  0.894827   Class2   Class2   Class2   Class2   Class2    Class2   
54   x55  0.597900   Class2   Class2   Class2   Class2   Class2    Class2   
55   x56  0.921874   Cl

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unlabeled_df[f"Label_k{k}"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unlabeled_df[f"Label_k{k}"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unlabeled_df[f"Label_k{k}"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l