In [None]:
from os.path import exists
import numpy as np 
import torch
import clip
from PIL import Image
import faiss
import csv
import json


device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14", device=device, jit=True)  


In [None]:
def build_index(file, index_type):
    
    dictionary = {}
    count = 0
    #Check Index type
    if index_type.lower() == "flat" or index_type.strip() =="":
        #initialise flat index
        index= faiss.IndexFlatL2(768)
        
        #Open file and read contents
        with open(file,'r')as file:
                
            filecontent=csv.reader(file)
                
            for word in filecontent: 
                    
                try:
                    # each row is a list so need to strip leading and trailing characters. 
                    word =str(word).replace('[','').replace(']','').replace("'","").replace("ï»¿","").replace(",","").strip()
                    text = clip.tokenize(word).to(device)
                    #encodes text into vector
                    text_features = model.encode_text(text)
                    #converts vector to a numpy array to allow it to be used with other python libraries. 
                    text_embedding = text_features.cpu().detach().numpy().astype("float32")
                    
                    #add embedding to index
                    index.add(text_embedding)
                    
                    #add elements to dictionary
                    dictionary.update({count : word})
                    count=count+1
                    #save embeddings to 'text_embeddings' folder to build index later. 
                    # path = folder+str(word).replace('[','').replace(']','').replace("'","")
                    # np.save(path, text_embedding) 
                    
                except Exception as e:
                    print(e)
                    return "There was an error."
    else:
        #build logic for another type of index
        return ""


    return index, dictionary

In [None]:
file="D:\\CSVs\\Fairface_Gender Labels.csv"
file2 = "D:\\CSVs\\FairFace_Age Labels.csv"
file3 = "D:\\CSVs\\FairFace_Race Labels.csv"

In [None]:
genders_index, gender_dictionary = build_index(file, 'flat')
age_index, age_dictionary = build_index(file2, 'flat')
race_index, race_dictionary = build_index(file3, 'flat')

In [None]:
# print(genders_index.ntotal)
# print(gender_dictionary)
# print(age_index.ntotal)
# print(age_dictionary)
# print(race_index.ntotal)
# print(race_dictionary)

In [None]:
from faiss import write_index
import pickle

In [None]:
write_index(genders_index, "D:\\Final_Project\\V2\\labels\\data\\genders.index")
write_index(age_index, "D:\\Final_Project\\V2\\labels\\data\\age.index")
write_index(race_index, "D:\\Final_Project\\V2\\labels\\data\\race.index")

In [None]:
try:
    new_file = open('D:\\Final_Project\\V2\\labels\\data\\genders_dictionary', 'wb')
    pickle.dump(gender_dictionary, new_file)
    new_file.close()

except Exception as e:
    print("Something went wrong")
    print(e)

In [None]:
try:
    new_file = open('D:\\Final_Project\\V2\\labels\\data\\age_dictionary', 'wb')
    pickle.dump(age_dictionary, new_file)
    new_file.close()

except Exception as e:
    print("Something went wrong")
    print(e)

In [None]:
try:
    new_file = open('D:\\Final_Project\\V2\\labels\\data\\race_dictionary', 'wb')
    pickle.dump(race_dictionary, new_file)
    new_file.close()

except Exception as e:
    print("Something went wrong")
    print(e)

In [None]:
file="D:\\CSVs\\Fairface_Gender Labels.csv"
file2 = "D:\\CSVs\\FairFace_Age Labels.csv"
file3 = "D:\\CSVs\\FairFace_Race Labels.csv"
nationalities, nat_dictionary = build_index(file, 'flat')
file="D:\\CSVs\\dogs.csv"
dogs, dog_dictionary= build_index(file, 'flat')

In [None]:
nationalities.ntotal

In [None]:
dogs.ntotal

In [None]:
nat_dictionary

In [None]:
dog_dictionary

In [None]:
file="D:\\CSVs\\sexes.csv"
sexes_index, sexes_dict = build_index(file, 'flat')

In [None]:
sexes_dict
sexes_index.ntotal

In [None]:
file="D:\\CSVs\\hair.csv"
hair_index, hair_dict = build_index(file, 'flat')

In [None]:
hair_dict

In [None]:
file="D:\\CSVs\\celeb_A_pre_labelled.csv"
celeb_A_pre_labelled_index, celeb_A_pre_labelled_dict = build_index(file, 'flat')

file="D:\\CSVs\\generations(young old).csv"
generations_index, generations_dict = build_index(file, 'flat')


In [None]:
celeb_A_pre_labelled_dict

In [None]:
file="D:\\CSVs\\beard.csv"
beard_index, beard_dict = build_index(file, 'flat')

file="D:\\CSVs\\wearing_glasses.csv"
wearingglasses_index, wearingglasses_dict = build_index(file, 'flat')

file="D:\\CSVs\\wearing_hat.csv"
hat_index, hat_dict = build_index(file, 'flat')

file="D:\\CSVs\\mouthopened.csv"
mouthopened_index, mouthopened_dict = build_index(file, 'flat')

In [None]:
mouthopened_dict

In [None]:
from faiss import write_index

write_index(nationalities, "nationalities.index")
write_index(dogs, "dogs.index")
write_index(hair_index, "hair.index")
write_index(sexes_index, "sexes.index")

write_index(generations_index, "generations.index")
write_index(celeb_A_pre_labelled_index, "celeb_A_pre_labelled.index")

write_index(beard_index, "beard.index")
write_index(wearingglasses_index, "wearing_glasses.index")
write_index(hat_index, "wearing_hat.index")
write_index(mouthopened_index, "mouthopened.index")

In [None]:
import pickle

In [None]:
try:
    new_file = open('nationalities_dictionary', 'wb')
    pickle.dump(nat_dictionary, new_file)
    new_file.close()

except:
    print("Something went wrong")

In [None]:
try:
    new_file = open('dog_dictionary', 'wb')
    pickle.dump(dog_dictionary, new_file)
    new_file.close()

except:
    print("Something went wrong")
    
    
    
try:
    new_file = open('sexes_dict', 'wb')
    pickle.dump(sexes_dict, new_file)
    new_file.close()

except:
    print("Something went wrong")
    

try:
    new_file = open('hair_dict', 'wb')
    pickle.dump(hair_dict, new_file)
    new_file.close()

except:
    print("Something went wrong")
    
    
try:
    new_file = open('celeb_A_pre_labelled_dict', 'wb')
    pickle.dump(celeb_A_pre_labelled_dict, new_file)
    new_file.close()

except:
    print("Something went wrong")
    
    
    
try:
    new_file = open('generations_dict', 'wb')
    pickle.dump(generations_dict, new_file)
    new_file.close()

except:
    print("Something went wrong")
    
    
        
try:
    new_file = open('beard_dict', 'wb')
    pickle.dump(beard_dict, new_file)
    new_file.close()

except:
    print("Something went wrong")
        
    
    
try:
    new_file = open('wearingglasses_dict', 'wb')
    pickle.dump(wearingglasses_dict, new_file)
    new_file.close()

except:
    print("Something went wrong")
    
try:
    new_file = open('hat_dict', 'wb')
    pickle.dump(hat_dict, new_file)
    new_file.close()

except:
    print("Something went wrong")
            
        
try:
    new_file = open('mouthopened_dict', 'wb')
    pickle.dump(mouthopened_dict, new_file)
    new_file.close()

except:
    print("Something went wrong")
            

In [None]:
with open("Indexes\\dogs\\dog_dictionary", "rb") as file:
    loaded_dict = pickle.load(file)

print(loaded_dict)

In [None]:
file="D:\\CSVs\\professions.csv"
professions_index, professions_dict = build_index(file, 'flat')

In [None]:
write_index(professions_index, "professions.index")

In [None]:
try:
    new_file = open('professions_dict', 'wb')
    pickle.dump(professions_dict, new_file)
    new_file.close()

except:
    print("Something went wrong")
            