In [2]:
import sqlite3
import os
import cv2 as cv 
from PIL import Image

## **CREATE TABLES**
--------------------------------------------

In [3]:
#create path
database_path = os.path.join(os.pardir, "databases", "image_recommender.db")
database_dir = os.path.dirname(database_path)

if not os.path.exists(database_dir):
    os.makedirs(database_dir)

conn = sqlite3.connect(database_path)
curs = conn.cursor()

In [4]:
#delete table if needed 
table_name = ["images", "measures"]

for name in table_name:
    curs.execute(f"DROP TABLE if exists {name}")

In [5]:
#create table paths
create_table = curs.execute("""CREATE TABLE if not exists images
             (imageid INTEGER PRIMARY KEY,
             filepath text not null,
             filename text not null);""")

conn.commit()

In [6]:
#create table similarities

create_table2 = curs.execute("""CREATE TABLE if not exists measures
             (imageid INTEGER PRIMARY KEY,
             color_similarity real,
             embedding_similarity real,
             third_similarity real,
             FOREIGN KEY (imageid) references images(imageid) ON DELETE CASCADE);""")

conn.commit()

In [7]:
# check if table is created 
print(f"Database created: {os.listdir(database_dir)}")

#show database

def describe_table(curs, table_name):
    curs.execute(f"PRAGMA table_info({table_name})")
    return curs.fetchall()


#check which databases are created
curs.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = curs.fetchall()
print(f"Tables: {tables}")

table_structure = describe_table(curs, "images")
table_structure


Database created: ['image_recommender.db']
Tables: [('scores',), ('images',), ('measures',)]


[(0, 'imageid', 'INTEGER', 0, None, 1),
 (1, 'filepath', 'TEXT', 1, None, 0),
 (2, 'filename', 'TEXT', 1, None, 0)]

## **FETCH DATA FROM HARDDRIVE**
-----------------------------------

In [8]:
#check path to harddrive
root_dir = "F:\data\image_data"
if os.path.exists(root_dir):
    print(True)

True


In [9]:
#generator
def load_images_from_harddrive(root_dir):
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.lower().endswith((".jpg", ".png", ".jpeg")):
                yield root, file
                

In [10]:
gen = load_images_from_harddrive(root_dir)

## **INSERT DATA INTO DATABASE**
--------------------

In [11]:
# imageid = 0

# for root, file in load_images_from_harddrive(root_dir):
#     filepath = root
#     filename = file
    
#     curs.execute(f"INSERT INTO IMAGES ('imageid', 'filepath', 'filename') VALUES (?,?, ?)", (imageid, filepath,filename))
#     conn.commit()
#     imageid += 1

# conn.close()

In [17]:
#gen with limit to test
imageid = 0

for _ in range(2000):
    root, file = next(gen)  
    curs.execute(f"INSERT INTO IMAGES ('imageid', 'filepath', 'filename') VALUES (?,?, ?)", (imageid, root, file))
    conn.commit()
    imageid += 1

## **TEST IF WORKED**
---------------

In [None]:
#test to retrieve pictures from database
curs.execute(f"SELECT filepath, filename FROM IMAGES")
firstpic = curs.fetchall()[0]
path = os.path.join(firstpic[0], firstpic[1])
image = Image.open(path)
image.show()

In [16]:
#delete all from table

curs.execute("DELETE FROM IMAGES")
conn.commit()

In [19]:
# select 

curs.execute("SELECT * FROM IMAGES WHERE imageid > 1995")
curs.fetchall()

[(1996, 'F:\\data\\image_data\\weather_image_recognition\\frost', '4068.jpg'),
 (1997, 'F:\\data\\image_data\\weather_image_recognition\\frost', '4069.jpg'),
 (1998, 'F:\\data\\image_data\\weather_image_recognition\\frost', '4070.jpg'),
 (1999, 'F:\\data\\image_data\\weather_image_recognition\\frost', '4071.jpg')]