In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from collections import Counter
import geopy.distance
from geopy.geocoders import Nominatim

def euclidian_distance(x1, x2):
    val = np.sqrt(np.sum((x1-x2)**2))
    print(val)
    return val

def manhattan_distance(x1, x2):
    val = np.sum(np.absolute(x1-x2))
    print(val)
    return val

k = 0;

In [2]:
class CustomkNN:
    
    def __init__(self, k=3):
        self.k = k
    
    #fit training labels and training set
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
    
    def predict(self, X, type_distance = 0): #can get multiple samples
        if type_distance == 1:
            print("Manhattan")
        else:
            print("Euclidian")
            
        print(X)
        predicted_labels = [self._predict(x, type_distance) for x in X]
        print(predicted_labels)
        return np.array(predicted_labels)
        
    def _predict(self, x, type_distance = 0):
        #racuna distancu
        if type_distance == 0 :  #euclidian distance
            distances = [euclidian_distance(x, x_train) for x_train in self.X_train.values]
        elif type_distance == 1:
            distances = [manhattan_distance(x, x_train) for x_train in self.X_train.values]
        #odredjuje k najblizih suseda
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        #majority vote, most common class label
        most_common = Counter(k_nearest_labels).most_common(1)
        class_num = most_common[0][0]
        decided_class = decide_class(class_num)
        print(decided_class)
        return class_num

In [3]:
def define_class(y):
    if y <= 49999:
        return 0
    elif (y >= 50000) and (y <= 99999):
        return 1
    elif (y >= 100000) and (y <= 149999):
        return 2
    elif (y >= 150000) and (y <= 199999):
        return 3
    else:
        return 4
    
def decide_class (class_num):
    if class_num == 0:
        return "price <= 49999"
    elif class_num == 1:
        return "50000 <= price <= 99999"
    elif class_num == 2:
        return "100000 <= price <= 149999"
    elif class_num == 3:
        return "150000 <= price <= 199999"
    else :
        return "price >= 200000"

def return_data() :
    #ucitavanje podataka
    X = pd.read_csv("with_distance.csv")
    print(X.dtypes)
    Y = pd.read_csv("y_non_null.csv")

    X = X[['broj_soba', 'kvadratura_m2', 'spratnost', 'udaljenost', 'godina']]
    Y = Y['cena_eur']
    Y = [define_class(y) for y in Y.values]
    print(Y)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1234)
    k = np.sqrt(X_train.shape[0])
    print(k)
    
    X_train = X_train[['broj_soba', 'kvadratura_m2', 'spratnost', 'udaljenost', 'godina']]
    return X_train,X_test,Y_train, Y_test

def distanca_od_centra_km(lokacija):
    print(lokacija)
    geolocator = Nominatim(user_agent="MOJ_APP")
    #koordinate centra
    location = geolocator.geocode("Kneza Mihaila Beograd")
    coords_centar = (location.latitude, location.longitude)
    print(coords_centar)
    #koordinate lokacija
    location = geolocator.geocode(lokacija)
    if location != None:
        coords_lokacija = (location.latitude, location.longitude)
        print(coords_lokacija)
        distanca = geopy.distance.distance(coords_lokacija, coords_centar).km
        print(distanca)
        return distanca
    else:
        return None

def test_knn(x, k = 3, type_distance = 0 ):
    X_train, X_test, Y_train, Y_test = return_data()
    clf = None;
    num_sim = k;
    if num_sim == -1:
        num_sim = int(np.sqrt(X_train.shape[0]))
    clf = CustomkNN(k = num_sim)
    clf.fit(X_train, Y_train)
    if x.empty:
        predictions = clf.predict(X_test.values, type_distance)
    else:
        print(x['lokacija'])
        x['udaljenost'] = distanca_od_centra_km(x['lokacija'][0] + ' Beograd')
        print(x)
        x = x[['broj_soba', 'kvadratura_m2', 'spratnost', 'udaljenost', 'godina']]
        predictions = clf.predict(x.values, type_distance)
    #acc = np.sum(predictions == Y_test) / len(Y_test)
   #print(acc)
    
def test_knn_gui():
    window = tk.Tk()
    window.title("KNN algorithm test")
    window.geometry('400x150')
    
    frame = tk.Frame(window, relief='sunken', bg = 'white')
    frame.pack(fill= BOTH, expand=True, padx=10, pady = 20)
    broj_soba_label = tk.Label(frame, text="Broj soba")
    broj_soba_label.grid(row=1, column=1)
    #broj_soba_label.pack(pady=15)
    
    kvadratura_m2_label = tk.Label(frame, text="Kvadratura(m2)")
    kvadratura_m2_label.grid(row=3, column=1)
    #kvadratura_m2_label.pack(pady=15)
    
    sprat_label = tk.Label(frame, text="Sprat")
    sprat_label.grid(row=5, column=1)
    #sprat_label.pack(pady=15)
    
    lokacija_label = tk.Label(frame, text="Lokacija")
    lokacija_label.grid(row=7, column=1)
    
    godina_label = tk.Label(frame, text="Godina")
    godina_label.grid(row=9, column=1)
    
    man = Tkinter.IntVar() #0 unchecked 1 checked
    chk_Manhattan = Tkinter.Chekbutton(frame, text="Manhattan", variable = man)
    chk_Manhattan.grid(row = 11, column=1)
    
    euk = Tkinter.IntVar() #0 unchecked 1 checked
    chk_Euclidian = Tkinter.Chekbutton(frame, text="Euclidian", variable = euk)
    chk_Euclidian.grid(row = 11, column=2)
    #lokacija_label.pack(pady=15)
    
    e1 = tk.Entry(frame)
    e2 = tk.Entry(frame)
    e3 = tk.Entry(frame)
    e4 = tk.Entry(frame)
    e5 = tk.Entry(frame)
    e1.grid(row=1, column=2)
    #e1.pack(pady=15)
    e2.grid(row=3, column=2)
    #e2.pack(pady=15)
    e3.grid(row=5, column=2)
    #e3.pack(pady=15)
    e4.grid(row=7, column=2)
    #e4.pack(pady=15)
    e5.grid(row=9, column=2)
    #e5.pack(pady=15)
    
    # This will create style object
    style = Style()

    def clicked(broj_soba, kvadratura, sprat, lokacija, godina, regressor):
        if broj_soba == None or broj_soba == "" or kvadratura == None or kvadratura == "" or sprat == None or sprat == "" or lokacija == None or lokacija == "" or godina == None or godina == "":
            tk.messagebox.showerror("Error", "Unesite sve podatke!")
        else :
        
            predvidjena_cena = Y_predict
            tk.messagebox.showinfo("Results", "Predvidjena cena: " + str(predvidjena_cena))
            #except Exception as error:
                #print(error)
                #tk.messagebox.showerror("Error", "Pokusajte ponovo!")
                
    # This will be adding style, and
    # naming that style variable as
    # W.Tbutton (TButton is used for ttk.Button).
    style.configure('W.TButton', font =
               ('calibri', 10, 'bold', 'underline'),
                foreground = 'red')
    btn = Button(frame, text="Predvidi cenu", state="DISABLED" , style="W.TButton", command=(lambda: clicked(e1.get(), e2.get(), e3.get(), e4.get(), e5.get(), man, euk)))
    btn.grid(row = 12, column=1)
    #btn.pack(pady=15)
    
    
    window.mainloop()
def test_scaller(X):
    X = pd.read_csv("with_distance.csv")
    Y = pd.read_csv("y_non_null.csv")
                
    X_min = np.amin(X)
    X_max = np.amax(X)
    Y_min = np.amin(Y)
    Y_max = np.amax(Y)
    sc = CustomScaler(feature_range=(0,1)).fit_test(X, X_min, X_max)
    test_transformed = sc.normalize(X, X_min, X_max)
    print(test_transformed)

In [4]:
#return_data()

#a = [1, 1, 1, 1, 2, 2, 3, 4, 5, 6]
#most_common = Counter(a).most_common(1)
#print(most_common)
test = {'broj_soba': [2], 'kvadratura_m2': [68], 'spratnost': [1], 'lokacija': ['Mali Mokri Lug'], 'godina': [2005]}
df = pd.DataFrame(test)
print(df.values)
print(test['lokacija'][0])
test_knn(df, k = -1, type_distance = 1)

[[2 68 1 'Mali Mokri Lug' 2005]]
Mali Mokri Lug
Unnamed: 0         int64
broj_soba          int64
kvadratura_m2      int64
spratnost          int64
godina             int64
udaljenost       float64
dtype: object
[3, 2, 2, 4, 3, 2, 2, 2, 2, 2, 4, 2, 4, 1, 4, 3, 4, 3, 4, 4, 2, 2, 0, 1, 0, 4, 4, 3, 1, 1, 0, 1, 3, 2, 1, 3, 3, 3, 3, 4, 2, 2, 3, 4, 2, 3, 2, 4, 2, 3, 3, 1, 3, 2, 1, 3, 4, 4, 2, 1, 2, 2, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 2, 1, 2, 2, 2, 4, 4, 2, 4, 4, 1, 0, 1, 1, 1, 4, 2, 2, 2, 3, 2, 2, 2, 2, 4, 4, 4, 3, 2, 2, 1, 1, 0, 4, 1, 1, 4, 1, 3, 1, 1, 2, 2, 0, 2, 2, 1, 3, 4, 0, 1, 0, 0, 2, 2, 1, 3, 1, 1, 4, 3, 0, 1, 1, 4, 4, 2, 1, 3, 2, 4, 3, 2, 3, 4, 1, 1, 2, 2, 2, 2, 1, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 4, 3, 1, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3, 2, 4, 3, 2, 2, 4, 3, 3, 2, 2, 2, 2, 3, 3, 2, 2, 2, 3, 2, 2, 3, 3, 1, 3, 2, 2, 2, 2, 1, 3, 2, 2, 0, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 3, 3, 1, 3, 1, 1, 4, 1, 1, 4