In [23]:
import os, sys, random
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
# import matplotlib.image as img 
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import StandardScaler, Normalizer
import torchvision.transforms as transforms

In [24]:
# Functie care genereaza un integer random intre a si b
def randomInt(a, b):
    return random.randint(a, b)

# Functie care modifica imaginile de train
# Dorim sa facem 2 transformari: rotate & flip(vertical si orizontal)
def transformImage(image):
    if random.random() > 0.5:
        image = transforms.functional.hflip(image)
    if random.random() > 0.5:
        image = transforms.functional.vflip(image) 
    if random.random() > 0.5:
        transform = transforms.RandomRotation(degrees=randomInt(0,360))
        image = transform(image)
    return image


In [25]:
# Functie definita pentru citirea datelor de antranare
def readTrainImages(path):
    train_images = []
    # Iteram prin primele 15000 de poze pentru a adauga la train_images
    for filename in sorted(os.listdir(path))[:15000]:
        # print((i,filename))
        
        # Incarcam imaginea curenta
        image = Image.open(path + "/" + filename)
        
        # Aplicam transformari peste imagine 
#         tImage1 = transformImage(image)
        
        # Convertim imaginea curenta intr-o imagine grayscale, apoi convertim la un array 1D
        image_array = np.array(image.convert("L")).flatten()
        # Adaugam imaginea curenta la vectorul de imagini, daca imaginea nu este facuta doar din pixeli negrii
        train_images.append(image_array)
        
        #Facem aceasi lucru si pentru imaginile transformate
#         image_array = np.array(tImage1.convert("L")).flatten()
#         train_images.append(image_array)
        
    # Convertim array-ul imaginilor intr-o np.array si il returnam
    return np.array(train_images)

In [26]:
# Functie definita pentru citirea datelor de validare - aceeasi functionalitate ca prima
def readValidationImages(path):
    valiadation_images = []

    for filename in sorted(os.listdir(path))[15000:17000]:
        # print((i,filename))
        image = Image.open(path + "/" + filename)
        image_array = np.array(image.convert("L")).flatten()
        valiadation_images.append(image_array)
        
    return np.array(valiadation_images)

In [27]:
# Functie definita pentru citirea datelor de testare - aceeasi functionalitate ca prima
def readTestImages(path):
    test_images = []
    
    for filename in sorted(os.listdir(path))[17000:]:
        # print((i,filename))
        image = Image.open(path + "/" + filename)
        image_array = np.array(image.convert("L")).flatten()
        test_images.append(image_array)
        
    return np.array(test_images)

In [28]:
# Functie care citeste din fisier label-urile pozelor. 
def readLabels(path,trainData=False):
    
    f = open(path)
    
    # Citim prima linie din fisier, care nu contine un label
    f.readline()
    
    labels=[]
    
    # Dintr-o linie memoram doar labelul
    for linie in f.readlines():
        labels.append(int(linie.split(',')[1]))
    f.close()
    
#     if trainData == True:
#         mulLabels = []
#         for label in labels:
#             mulLabels.extend([label,label])
#         return mulLabels
    
    return labels

In [29]:
# Genereaza un numar de numarIntervale in care valorile 
# curpinse intre 0 si 256 sunt distribuite uniform
def generareIntervale(numarIntervale,Max):
    intervale = np.linspace(start=0, stop=Max, num=numarIntervale)
    return intervale

# Transforma matricea x intr-o matrice de aceeasi dimensiune,
# in care pixelii sunt inlocuiti cu labe-urile corespunzatoare.
def valori_intervale(matrice,intervale):
    newMatix = np.zeros(matrice.shape)
    for i, elem in enumerate(matrice):
        newMatix[i] = np.digitize(elem, intervale)
    return newMatix - 1

In [30]:
# Ne definim cai pentru a citi datele de intrare.
pathImages = "/kaggle/input/unibuc-brain-ad/data/data"
pathLabels = "/kaggle/input/unibuc-brain-ad/data"

# pathImages = "D:/Chestii/Facultate/Anul II/Semestrul II/IA - Inteligenta Artificiala/Proiect ML/data/data"
# pathLabels = "D:/Chestii/Facultate/Anul II/Semestrul II/IA - Inteligenta Artificiala/Proiect ML/data"

In [31]:
#Citire date
train_images=readTrainImages(pathImages)

# Gasim pixelul de valoare maxima din dataele de train
Max=np.max(train_images)
print(1)

validation_images=readValidationImages(pathImages)
validationMax=np.max(validation_images)

# Facem maximul
Max=max(Max,validationMax)
print(2)

test_images=readTestImages(pathImages)
print(3)

# train_labels=np.array(readLabels(pathLabels + "/train_labels.txt",trainData=True))
train_labels=np.array(readLabels(pathLabels + "/train_labels.txt"))

print(4)

validation_labels=np.array(readLabels(pathLabels + "/validation_labels.txt"))
print(5)

# train_labels=np.concatenate((train_labels,validation_labels))

1
2
3
4
5


In [32]:
# Generam 4 de intervale, cu valori distribuite uniform intre 0 si valoarea cea mai mare dintre pixelii pozelor.
# intervale=generareIntervale(4,Max)

#Generam 4 intervale intre 0 si 224 pixeli, prin teste amobservat ca aceasta este valoarea optima
intervale=generareIntervale(4,224)

# Transformam array-urile train_images si validation_images dupa cum urmeaza
# train[i] = intervalul de care apartine matrice[i], matrice fiind train_images sau validation_images
train=valori_intervale(train_images,intervale)
print(6)

validation=valori_intervale(validation_images,intervale)
print(7)

test=valori_intervale(test_images,intervale)
print(8)

6
7


In [33]:
# Ne definim modelul - Naive Bayes
bayes = MultinomialNB()
print(9)

# Antrenam modelul si generam scorul de acuratete pentru datele de antrenare
bayes.fit(train, train_labels)
scor = bayes.score(validation, validation_labels)
print(scor)

# Generam label-urile pozelor dupa cum a fost antrenat modelul
test_labels = bayes.predict(test)

# Generam CSV-ul care contine label-urile la care s-a dat predict.
stringLabels = []

for i in range(17001, 22150):
    label='0' + str(i) + ',' + str(test_labels[i - 17001])
    stringLabels.append(label)
    
np.savetxt('rezultat.csv', stringLabels, fmt='%s', header="id,class", comments='')


9
0.722
