In [2]:
import PIL
from PIL import Image
#from PIL import ImageOps
import os
from os import listdir
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import math

from sklearn import metrics
import matplotlib.pyplot as plt

In [3]:
def calculate_accuracy(y_true, y_pred): #liczy jak dobry jest przewidziany wynik
    corrected=sum(y_true==y_pred)
    return corrected/len(y_true)

def split_image_into_squares(im,rows_number): #dzielimy obrazek na kwadraty, rows_number to liczba kwadratow w wierszu
    split_image=[]
    columns_number=rows_number
    square_size=im.size[0]/rows_number #zakladam ze wejsciowe obrazki beda kwadratowe
    
    left = 0
    top = 0
    right = square_size
    bottom = square_size
    
    for i in range(rows_number):
        for j in range(columns_number):
            split_image.append(im.crop((left,top,right,bottom)))
            left+=square_size
            right+=square_size
        left=0
        right=square_size
        top+=square_size
        bottom+=square_size
    
    return(split_image) #zwraca liste przechowujaca czesci pocietego obrazka

def count_pixels(im, color): #liczy liczbe pikseli danego koloru w obrazie
    counter=0
    for pixel in im.getdata():
        if pixel==color:
            counter+=1
    return counter

def normalize(image_size, pixel_count): #zwraca procent pikseli danego koloru w obrazku
    return (pixel_count/image_size)*100

def images_to_dataframe(directory, rows_number, color=0): #wczytuje z folderu z obrazkami obrazki i przemieniamy na dataframe
    table=[]
    for folder in os.listdir(directory): #zakladam ze obrazki sa w podfolderach nazwanych tak jak ich ksztalt
        for image_name in os.listdir(directory+"/"+folder):
            print(image_name)
            row=[]
            image=Image.open(directory+"/"+folder+"/"+image_name)
            image=image.convert('1') #upewniamy sie ze piksele sa tylko czarne lub biale
            split_image=split_image_into_squares(image,rows_number)
            for s in split_image:
                pixel_count=count_pixels(s,color)
                row.append(normalize(s.size[0]*s.size[1],pixel_count))
                #row.append(count_pixels(s,color)) #ta linijke mozna uzyc jesli wszystkie wejsciowe obrazki maja taki sam rozmiar
            row.append(folder)
            table.append(row)
    
    column_names=[]
    for i in range(rows_number):
        for j in range(rows_number):
            column_names.append("r"+str(i)+"c"+str(j)) #nazywamy kolumny jak "rXcY" gdzie X to numer wiersza i Y numer kolumny podzielonego obrazka
    column_names.append("shape") #czyli np. r0c0 bedzie lewym gornym rogiem obrazka
    
    df=pd.DataFrame(table)
    df.columns=column_names
    return df
    #zwracany dataframe ma w poszczegolnych kolumnach procent czarnych w pikseli w danej czesci obrazka
    
class KNN:
    def __init__(self,k=3):
        self.k=k #liczba sasiadow

    def fit(self,X_train, y_train):
        self.X_train=X_train
        self.y_train=y_train

    def calculateDistance(self,x,x1): #mozna tez np np.linalg.norm(X_train.values[0]-X_test.values[0])
        sum=0 #liczymy odlegosc miedzy dwoma xami
        for i in range(0,len(x)):
            sum=sum+(x[i]-x1[i])**2
        return math.sqrt(sum)

    def predict(self, X_test):
        pred=[]
        for x in X_test.values: #liczymy odlegosci dla wszystkich xow od wszystkich xow treningowych
            distances=[]
            for x1 in self.X_train.values:
                distances.append(self.calculateDistance(x,x1))
            indexes=np.argsort(distances) #sortujemy dystanse i zapisujemy indeksy
            neighbours_indexes=indexes[:self.k] #zapisujemy indeksy sasiadow czyli 3 z gory
            neighbours=[]
            for i in neighbours_indexes:
                neighbours.append(self.y_train.iloc[i]) #zapisjemy sasiadow
            result=max(set(neighbours),key=neighbours.count) #wyciagamy rezulatt ktory najczesciej sie pojawia
            pred.append(result) #i dopisujemy do wynikow
        return pred

In [6]:
#df=pd.read_csv(r"shapes_dataset_3.csv")
images_to_dataframe(r"Shapes_dataset",3)

1.png
10.png
11.png
12.png
13.png
14.png
15.png
16.png
17.png
18.png
19.png
2.png
20.png
20240505_175102.jpg
20240505_175106.jpg
20240505_175110.jpg
20240505_175139.jpg
20240505_175142.jpg
20240505_175332.jpg
20240505_175335.jpg
20240505_175338.jpg
20240505_175341.jpg
20240505_175343.jpg
20240505_175346.jpg
20240505_175448.jpg
20240505_175452.jpg
20240505_175454.jpg
20240505_175457.jpg
20240505_175502.jpg
20240505_175510.jpg
20240505_175514.jpg
20240505_175517.jpg
20240505_175520.jpg
20240505_175522.jpg
20240505_175610.jpg
20240505_175612.jpg
20240505_175616.jpg
20240505_175622.jpg
20240505_175626.jpg
20240505_175628.jpg
20240505_175656.jpg
20240505_175700.jpg
20240505_175734.jpg
20240505_175741.jpg
20240505_175745.jpg
20240505_175828.jpg
20240505_175831.jpg
20240505_175834.jpg
20240505_175836.jpg
20240505_175840.jpg
20240505_175843.jpg
20240505_175945.jpg
20240505_175947.jpg
20240505_175949.jpg
20240505_175952.jpg
20240505_175953.jpg
20240505_175955.jpg
20240505_180037.jpg
20240505_18

20240505_210328.jpg
20240505_210333.jpg
20240505_210337.jpg
20240505_210441.jpg
20240505_210451.jpg
20240505_210456.jpg
20240505_210459.jpg
20240505_210604.jpg
20240505_210608.jpg
20240505_210624.jpg
20240505_210627.jpg
20240505_210631.jpg
20240505_210635.jpg
20240505_210744.jpg
20240505_210750.jpg
20240505_210755.jpg
20240505_210759.jpg
20240505_210803.jpg
20240505_210806.jpg
20240505_210810.jpg
20240505_210854.jpg
20240505_210858.jpg
20240505_210903.jpg
20240505_210909.jpg
20240505_210912.jpg
20240505_210915.jpg
20240505_211000.jpg
20240505_211005.jpg
20240505_211011.jpg
20240505_211019.jpg
20240505_211021.jpg
20240505_211025.jpg
20240505_211104.jpg
20240505_211110.jpg
20240505_211114.jpg
20240505_211118.jpg
20240505_211126.jpg
20240505_211130.jpg
20240505_211229.jpg
20240505_211234.jpg
20240505_211240.jpg
20240505_211243.jpg
20240505_211247.jpg
20240505_211253.jpg
20240505_211257.jpg
20240505_211301.jpg
20240505_211348.jpg
20240505_211354.jpg
20240505_211358.jpg
20240505_211401.jpg


Unnamed: 0,r0c0,r0c1,r0c2,r1c0,r1c1,r1c2,r2c0,r2c1,r2c2,shape
0,3.965248,5.223881,3.564268,4.794211,0.000000,4.816825,3.720205,4.975124,3.541992,circle
1,2.762308,6.331976,3.564268,5.178652,0.000000,5.269109,3.653375,4.997739,7.262196,circle
2,4.076632,6.422433,3.007351,4.816825,0.000000,4.590683,5.012252,4.703754,4.722655,circle
3,4.165738,4.748982,4.989975,4.635911,0.000000,4.862053,3.163288,6.309362,5.970149,circle
4,4.143462,4.726368,3.430608,5.065581,0.000000,5.970149,3.475162,5.246495,5.034529,circle
...,...,...,...,...,...,...,...,...,...,...
684,3.163288,6.874717,0.000000,4.884668,1.882461,7.168702,5.747382,5.336952,2.762308,triangle
685,1.314324,6.829489,3.831588,7.960199,0.918274,5.088195,0.000000,4.115785,7.239920,triangle
686,0.000000,7.485301,1.626197,7.236545,2.043159,4.839439,2.160838,5.269109,7.017153,triangle
687,0.000000,5.042967,3.408331,7.078245,2.433425,4.907282,1.804411,5.133424,7.484963,triangle


In [10]:
X=df.iloc[:,:len(df.columns)-1]
y=df.loc[:,"shape"]
X_train, X_test, y_train, y_test =train_test_split(X,y,test_size=0.3)

In [None]:
results=[]
for i in range(100):
    X_train, X_test, y_train, y_test =train_test_split(X,y,test_size=0.3)
    knn = KNN(1)
    knn.fit(X_train, y_train)
    y_pred=knn.predict(X_test)
    results.append(calculate_accuracy(y_test,y_pred))
print(np.mean(results))