In [26]:
import pandas as pd
import json
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score

In [16]:
# Load the dataset
with open('orders.json') as f:
    data = json.load(f)

# Flatten the data
comparison_data = []
for item in data:
    _id = item["_id"]["$oid"]
    order1_item_name = item["order1"]["itemName"]
    order1_popularity = item["order1"]["popularity"]
    order1_durability = item["order1"]["durability"]
    order1_price = item["order1"]["price"]
    order2_item_name = item["order2"]["itemName"]
    order2_popularity = item["order2"]["popularity"]
    order2_durability = item["order2"]["durability"]
    order2_price = item["order2"]["price"]
    comparison_data.append({
        "_id": _id,
        "order1_itemName": order1_item_name,
        "order1_popularity": order1_popularity,
        "order1_durability": order1_durability,
        "order1_price": order1_price,
        "order2_itemName": order2_item_name,
        "order2_popularity": order2_popularity,
        "order2_durability": order2_durability,
        "order2_price": order2_price,
    })

# Convert to DataFrame
df = pd.DataFrame(comparison_data)

# Display the DataFrame
print(df)

                          _id order1_itemName order1_popularity  \
0    664f81d434a7bc1794e1da7e        Big Root               low   
1    664f81d434a7bc1794e1da7f        Big Root            medium   
2    664f81d434a7bc1794e1da80       Burn Heal            medium   
3    664f81d434a7bc1794e1da81    Thunderstone               low   
4    664f81d434a7bc1794e1da82      Moon Stone            medium   
..                        ...             ...               ...   
995  664f81d434a7bc1794e1de61          potion            medium   
996  664f81d434a7bc1794e1de62       maxpotion               low   
997  664f81d434a7bc1794e1de63        pokeball               low   
998  664f81d434a7bc1794e1de64       maxpotion            medium   
999  664f81d434a7bc1794e1de65     Expert Belt               low   

    order1_durability  order1_price order2_itemName order2_popularity  \
0                high            50       Burn Heal              high   
1                high             5      Moon Sto

In [17]:
# Define features (X) and target variable (y)
X = df[['order1_itemName', 'order1_popularity', 'order1_durability', 'order1_price']]
y = df['order2_itemName']


In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
# Initialize the KNN classifier
k = 5  # Number of neighbors
knn = KNeighborsClassifier(n_neighbors=k)

In [22]:
# Define categorical features for one-hot encoding
categorical_features = ['order1_itemName', 'order1_popularity', 'order1_durability', 'order1_price']

# Apply one-hot encoding to categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(), categorical_features)
    ],
    remainder='passthrough'
)

In [23]:
# Fit and transform the training data
X_train_encoded = preprocessor.fit_transform(X_train)

# Transform the testing data
X_test_encoded = preprocessor.transform(X_test)

In [24]:
# Train the classifier
knn.fit(X_train_encoded, y_train)

In [25]:
# Predict the cities for the test set
y_pred = knn.predict(X_test_encoded)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.025
