In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset into a pandas DataFrame (replace 'your_dataset.csv' with the actual file path)
df = pd.read_csv(r"C:\Users\GIS\Desktop\physics_particles.csv")

# Handle missing values (replace '-' with NaN)
df.replace('-', float('nan'), inplace=True)

# Drop rows with missing values
df.dropna(inplace=True)

# Select the features (attributes) and the target (the column you want to predict)
# For this example, we'll use 'charge', 'mass', and 'width' as features and 'pdg_name' as the target
features = ['charge', 'mass', 'width']
target = 'pdg_name'

X = df[features]
y = df[target]

# Convert non-numeric values to numeric using label encoding
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling (standardization)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the k-NN classifier and train it on the training data
knn = KNeighborsClassifier(n_neighbors=3)  # You can adjust the 'n_neighbors' parameter as needed
knn.fit(X_train_scaled, y_train)

# Make predictions on the test data
y_pred = knn.predict(X_test_scaled)

# Inverse transform the predictions to get back the original class labels
y_pred_original = label_encoder.inverse_transform(y_pred)

# Evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Generate a classification report and confusion matrix
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.0
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       0.0
           3       0.00      0.00      0.00       0.0
           5       0.00      0.00      0.00       1.0
           6       0.00      0.00      0.00       1.0
           8       0.00      0.00      0.00       1.0

    accuracy                           0.00       4.0
   macro avg       0.00      0.00      0.00       4.0
weighted avg       0.00      0.00      0.00       4.0

Confusion Matrix:
 [[0 0 1 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 1 0 0 0 0]
 [1 0 0 0 0 0]
 [0 0 1 0 0 0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
df

Unnamed: 0,pdg_id,pdg_name,name,charge,rank,quarks,mass,mass_lower,mass_upper,width,width_lower,width_upper
0,-2212,p,anti_proton,-1.0,4,UUD,938.27208816,2.9e-07,2.9e-07,0.0,0.0,0.0
1,-2112,n,anti_neutron,0.0,4,UDD,939.5654205,5e-07,5e-07,7.493e-25,4.0000000000000003e-28,4.0000000000000003e-28
2,-321,K,kaon-,-1.0,0,Us,493.677,0.016,0.016,5.317e-14,9e-17,9e-17
3,-211,pi,pion0,-1.0,0,Ud,139.57039,0.00018,0.00018,2.5284e-14,5e-18,5e-18
11,-6,t,anti_top,-0.666667,0,T,172500.0,700.0,700.0,1420.0,150.0,190.0
22,6,t,top,0.666667,0,t,172500.0,700.0,700.0,1420.0,150.0,190.0
34,111,pi,pion-,0.0,0,(uU-dD)/sqrt(2),134.9768,0.0005,0.0005,7.81e-06,1.2e-07,1.2e-07
35,211,pi,pion+,1.0,0,uD,139.57039,0.00018,0.00018,2.5284e-14,5e-18,5e-18
36,321,K,kaon+,1.0,0,uS,493.677,0.016,0.016,5.317e-14,9e-17,9e-17
37,2112,n,neutron,0.0,4,udd,939.5654205,5e-07,5e-07,7.493e-25,4.0000000000000003e-28,4.0000000000000003e-28
