In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load the dataset
data = pd.read_csv("Startups.csv")

In [3]:
# Select the specified columns for classification
selected_columns = ["Company", "Valuation", "Industry", "Country"]
data = data[selected_columns]

In [4]:
# Define Valuation categories
# We use three categories here: Low, Medium, and High
def categorize_valuation(valuation):
    if valuation < 5000000:
        return "Low"
    elif 5000000 <= valuation < 10000000:
        return "Medium"
    else:
        return "High"

data["Valuation"] = data["Valuation"].apply(categorize_valuation)

In [5]:
# Encode categorical variables (e.g., Industry and Country)
label_encoder = LabelEncoder()
data["Industry"] = label_encoder.fit_transform(data["Industry"])
data["Country"] = label_encoder.fit_transform(data["Country"])
data["Valuation"] = label_encoder.fit_transform(data["Valuation"])  # Encode the target variable

In [6]:
# Split data into features (X) and target variable (Valuation)
X = data[["Industry", "Country"]]
y = data["Valuation"]

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Create the k-NN classifier (you can specify the value of k)
knn = KNeighborsClassifier(n_neighbors=3)

In [9]:
# Train the classifier on the training data
knn.fit(X_train, y_train)

In [10]:
# Make predictions on the test set
y_pred = knn.predict(X_test)