In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Read the dataset
df = pd.read_excel(r'C:\Users\17b-132-se\Downloads\Job_Scheduling.xlsx')

# Separate the input features (X) and output variable (y)
X = df.iloc[:, 0:4].values
y = df.iloc[:, 4].values

# Handling missing values using mean imputation
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy="mean")
X = imputer.fit_transform(X)
y = imputer.fit_transform(y.reshape(-1, 1)).reshape(-1)

# Rescaling features using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)

# Standardizing features using StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Print the preprocessed data
print("Preprocessed X:")
print(X)
print("\nPreprocessed y:")
print(y)

Preprocessed X:
[[ 4.41334946e-01 -1.45773214e-01  7.87817592e-01  9.18936583e-01]
 [-3.00076546e-01  1.54402989e+00  1.13951189e+00 -1.14867073e+00]
 [-1.01183158e+00 -5.23617386e-01  1.65279546e+00  9.18936583e-01]
 [ 1.39775577e+00  7.46358857e-01  0.00000000e+00 -1.14867073e+00]
 [ 6.32150851e-02  2.42566629e-01  2.84039274e-01 -1.14867073e+00]
 [-1.38253732e+00 -1.64665423e+00  6.92765079e-01 -1.14867073e+00]
 [ 9.01010071e-01 -6.81052457e-01 -1.07521166e+00  9.18936583e-01]
 [ 1.44965458e+00  1.06122900e-01  1.92844775e+00  9.18936583e-01]
 [ 4.48749061e-01  8.19828557e-01 -1.72156875e+00 -1.14867073e+00]
 [-1.83401790e-02  2.16327450e+00 -1.00867490e+00  0.00000000e+00]
 [-2.77834201e-01 -2.11493530e-16  8.38698643e-03  9.18936583e-01]
 [ 3.22709107e-01  1.23965541e+00  3.31565530e-01  9.18936583e-01]
 [-1.41219378e+00 -8.59478871e-01 -7.90054121e-01  9.18936583e-01]
 [ 2.70810303e-01 -1.11137499e+00  0.00000000e+00 -1.14867073e+00]
 [-8.26478705e-01 -9.43444243e-01 -7.71602751e

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Read the dataset
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
df = pd.read_csv(url, names=column_names)

# Separate the input features (X) and target variable (y)
X = df.iloc[:, 0:4].values
y = df.iloc[:, 4].values

# Encoding the target variable using LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

# Standardizing the features using StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Print the preprocessed data
print("Preprocessed X:")
print(X)
print("\nPreprocessed y:")
print(y)

Preprocessed X:
[[-9.00681170e-01  1.03205722e+00 -1.34127240e+00 -1.31297673e+00]
 [-1.14301691e+00 -1.24957601e-01 -1.34127240e+00 -1.31297673e+00]
 [-1.38535265e+00  3.37848329e-01 -1.39813811e+00 -1.31297673e+00]
 [-1.50652052e+00  1.06445364e-01 -1.28440670e+00 -1.31297673e+00]
 [-1.02184904e+00  1.26346019e+00 -1.34127240e+00 -1.31297673e+00]
 [-5.37177559e-01  1.95766909e+00 -1.17067529e+00 -1.05003079e+00]
 [-1.50652052e+00  8.00654259e-01 -1.34127240e+00 -1.18150376e+00]
 [-1.02184904e+00  8.00654259e-01 -1.28440670e+00 -1.31297673e+00]
 [-1.74885626e+00 -3.56360566e-01 -1.34127240e+00 -1.31297673e+00]
 [-1.14301691e+00  1.06445364e-01 -1.28440670e+00 -1.44444970e+00]
 [-5.37177559e-01  1.49486315e+00 -1.28440670e+00 -1.31297673e+00]
 [-1.26418478e+00  8.00654259e-01 -1.22754100e+00 -1.31297673e+00]
 [-1.26418478e+00 -1.24957601e-01 -1.34127240e+00 -1.44444970e+00]
 [-1.87002413e+00 -1.24957601e-01 -1.51186952e+00 -1.44444970e+00]
 [-5.25060772e-02  2.18907205e+00 -1.45500381e

In [4]:
import warnings
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Disable the FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning)

# Load the Iris dataset
iris = load_iris()

# Create the feature matrix (X) and target vector (y)
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vary the number of neighbors
neighbors = [1, 3, 5, 7, 9]

# Perform KNN classification for each number of neighbors
for n in neighbors:
    # Create KNN classifier with n neighbors
    knn = KNeighborsClassifier(n_neighbors=n)

    # Fit the classifier to the training data
    knn.fit(X_train, y_train)

    # Predict the classes for the test set
    y_pred = knn.predict(X_test)

    # Calculate the accuracy of the classifier
    accuracy = accuracy_score(y_test, y_pred)

    # Print the accuracy for each number of neighbors
    print("Number of Neighbors:", n)
    print("Accuracy:", accuracy)
    print("--------------------")


Number of Neighbors: 1
Accuracy: 1.0
--------------------
Number of Neighbors: 3
Accuracy: 1.0
--------------------
Number of Neighbors: 5
Accuracy: 1.0
--------------------
Number of Neighbors: 7
Accuracy: 0.9666666666666667
--------------------
Number of Neighbors: 9
Accuracy: 1.0
--------------------


In [6]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Download the Dermatology dataset from UCI repository
data = fetch_openml(name='dermatology', version=1)

# Create the feature matrix (X) and target vector (y)
X = data.data
y = data.target

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Scale the feature matrix
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into train and test sets (70%, 30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create KNN classifier
knn = KNeighborsClassifier()

# Fit the classifier to the training data
knn.fit(X_train, y_train)

# Predict the classes for the test set
y_pred = knn.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = np.mean(y_pred == y_test)

# Print the accuracy
print("Accuracy:", accuracy)

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Display the confusion matrix
print("Confusion Matrix:")
print(cm)

# Perform 10-fold cross-validation
cv_scores = cross_val_score(knn, X, y, cv=10)

# Print the cross-validation scores
print("Cross-Validation Scores:")
print(cv_scores)


Accuracy: 0.9727272727272728
Confusion Matrix:
[[39  1  0  0  0  0]
 [ 0 15  0  1  0  0]
 [ 0  0 19  0  0  0]
 [ 0  1  0 14  0  0]
 [ 0  0  0  0 16  0]
 [ 0  0  0  0  0  4]]
Cross-Validation Scores:
[1.         0.94594595 1.         0.94594595 0.97297297 0.94594595
 0.94444444 1.         0.94444444 0.86111111]
