# Importing Dependencies

In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
import time
import math
from random import randint
import random
import pickle

# Importing and Describing Dataset

In [None]:
path = "college_details.csv"
dataset = pd.read_csv(path)

In [None]:
dataset.head(5)

In [None]:
dataset.tail(5)

In [None]:
dataset.shape

In [None]:
dataset.describe()

In [None]:
dataset.groupby('Lable').size()

# Ploting Dataset

In [None]:
colors = []
data_lat = []
data_lon = []
for i, row in dataset.iterrows():
    data_lat.append(row["Lat"])
    data_lon.append(row["Lon"])
    colors.append(randint(0,100))

In [None]:
plt.figure(figsize=(40,40))
plt.scatter(data_lon, data_lat, c=colors, cmap='nipy_spectral')
plt.title('Location of Colleges in India', fontsize=40)
plt.xlabel('Longitude', fontsize=30)
plt.ylabel("Latitude", fontsize=30)
plt.show()

# Spliting and Training

In [None]:
#Selecting feature and lable column
feature_columns = ['Lat','Lon']
X = dataset[feature_columns].values
y = dataset['Lable'].values

In [None]:
#Spliting Training and Testing dataset in 7:3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
# print(X_train)
# print(y_test)

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#Taking nearest neighbor as 1
classifier = KNeighborsClassifier(n_neighbors= 1)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

In [None]:
#Confusion Matrix
confusion_matrix(y_test, y_pred)

In [None]:
#Classification Report
print(classification_report(y_test, y_pred))

In [None]:
#Accuracy
accuracy = accuracy_score(y_test, y_pred)*100
print('Accuracy of our model is equal ' + str(round(accuracy, 2)) + ' %.')

In [None]:
#Calculating error for K values between 1 and 40
error = []
for i in range(1, 40):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    pred_i = knn.predict(X_test)
    res = knn.score(X_test,y_test).item()
    error.append(np.mean(pred_i != y_test))

#Printing Minimum error
print(min(error))

In [None]:
#Finding optimal number of neighbor

plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o', markerfacecolor='blue', markersize=10)
plt.title('Error Rate K Value')
plt.xlabel('K Value')
plt.ylabel('Mean Error')

In [None]:
best_k = range(1, 40)[error.index(min(error))]
print("The optimal number of neighbors is %d." % best_k)

# Converting our  trained model into executable model

In [None]:
with open ('nearest_college_model', 'wb') as f:
    pickle.dump(knn, f)

In [None]:
with open ('nearest_college_model', 'rb') as f:
    model = pickle.load(f)

# Predicition of Nearest Colleges

In [None]:
def nearest_college(name):
    try:
        path = "college_details.csv"
        dataset = pd.read_csv(path)
        df = dataset.loc[dataset['Name'] == name.strip()]
        info=name.split(",")
        city=info[-1].strip()
        
        lat = float(df["LatX"])
        lon = float(df["LonX"])

        pred = model.predict([[lat, lon]])

        df2 = dataset.loc[dataset['Lable'] == pred[0]]
        df1 = df2.loc[dataset["City"] == city]
        college_list = {}
        if len(df1)<15:
            df1=df2
        
        try:
            for i, row in df1.iterrows():
                dis = math.sqrt((lat-row["Lat"])**2 + (lon-row["Lon"])**2)
                if row["Name"]==name.strip():
                    pass
                elif (len(college_list)<16 and row["Name"]!=name.strip()) and city==row["City"]:
                    college_list[row["Name"]] = dis
                else:
                    col_name = max(college_list, key=college_list.get)
                    if dis < college_list[col_name] and row["Name"]!=name.strip() and city==row["City"]:
                        del college_list[max(college_list, key=college_list.get)]
                        college_list[row["Name"]] = math.sqrt((lat-row["Lat"])**2 + (lon-row["Lon"])**2)
                    else: 
                        pass
        finally:
            keys = list(college_list.keys())
            random.shuffle(keys)
            final_list = []
            for key in keys[0:5]:
                final_list.append(key)
            return final_list
    
    except Exception as e:
        print(e)
        return False

In [None]:
nearest_college(input("Enter: "))

In [None]:
# Sample Colleges name
# JSS ACADEMY OF TECHNICAL EDUCATION, BANGALORE URBAN
# JSS ACADEMY OF TECHNICAL EDUCATION, GAUTAM BUDDHA NAGAR
# AJAY KUMAR GARG ENGINEERING COLLEGE, GHAZIABAD
# INDERPRASTHA ENGINEERING COLLEGE, GHAZIABAD
# HITECH INSTITUTE, GHAZIABAD