In [1]:
# pip install ibm-cos-sdk==2.0.1

In [2]:
import os
import json
# !pip install ibm-cos-sdk
import ibm_boto3
from ibm_botocore.client import Config, ClientError
import sqlite3
import pandas as pd
from hashlib import md5
from time import localtime

In [3]:
# Constants for IBM COS values
COS_ENDPOINT = 'https://s3.eu.cloud-object-storage.appdomain.cloud' # Current list avaiable at https://control.cloud-object-storage.cloud.ibm.com/v2/endpoints
COS_BUCKET_LOCATION = "eu-geo"
COS_API_KEY_ID = "p7f0KHPQzHEil1eggZcWfvdVYJJV8SxkMzvXOQNiMrFS" # eg "W00YixxxxxxxxxxMB-odB-2ySfTrFBIQQWanc--P3byk"
COS_INSTANCE_CRN = "crn:v1:bluemix:public:cloud-object-storage:global:a/39fc6c17b51f421da36b42417ab2f7ab:53b4d3d6-475b-4de5-8e47-103802bc8c01::"

In [4]:
# Create resource
cos = ibm_boto3.resource("s3",
    ibm_api_key_id=COS_API_KEY_ID,
    ibm_service_instance_id=COS_INSTANCE_CRN,
    config=Config(signature_version="oauth"),
    endpoint_url=COS_ENDPOINT)

cos_client = ibm_boto3.client("s3",
    ibm_api_key_id=COS_API_KEY_ID,
    ibm_service_instance_id=COS_INSTANCE_CRN,
    config=Config(signature_version="oauth"),
    endpoint_url=COS_ENDPOINT)

In [5]:
# Configure Internall SQL database
def create_sql_table(db_path):
    ''' create an SQLite database with table schema: (key:TEXT,value:TEXT)'''
    connection = sqlite3.connect(db_path)
    cursor = connection.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS catalog (directory TEXT, object_name TEXT, reference_directory TEXT, reference_object_name TEXT)''') 
    connection.commit()
    return connection

def remove_databse(db_path):
    if os.path.exists(db_path):
        os.remove(db_path)

In [6]:
# Extended object storage interface
class ExtendedObjectStorage():
    def __init__(self):
        self.bucket = 'bucket-oz-shlomi'
        db_path = 'temp_db.db'
        remove_databse(db_path)
        self.connection = create_sql_table(db_path)#sql connection
        self.cursor = self.connection.cursor()
    
    @staticmethod
    def generate_uniqe_name(name):
        '''This function convert the object name provided by the user to uniqe name that will be use in the object storage
        in order to prevent collisions'''
        extension = name.split('.')[1]
        new_name = md5(str(localtime()).encode('utf-8')).hexdigest() + '.' + extension
        return new_name
        
    def create_object(self, directory_name, object_name, file_text):
        '''This function get from the user directory name, object name and contet (file text),
        insert the information to the internally DB and store the data in the object storage'''
        
        uniqe_object_name = self.generate_uniqe_name(object_name) #create a uniqe object name that will be use in the cbject storage
        
        # Check if there is exist such directory
        self.cursor.execute(f'''SELECT reference_directory FROM catalog WHERE directory = "{directory_name}"''')
        ref = self.cursor.fetchall()
        if ref: # exist 
            ref = "/".join(ref[0])
        else:
            print("There is no such directory!\nFile not created")
            return
        
        # first we create the object in object storage. Only when it's done we update database
        print("Creating new item: {0}".format(object_name))
        cos.Object(self.bucket, "/".join([ref, uniqe_object_name])).put(Body=file_text)
        self.cursor.execute(f'''INSERT INTO catalog (directory, object_name, reference_directory, reference_object_name) VALUES ("{directory_name}", "{object_name}", "{ref}", "{uniqe_object_name}");''')
        self.connection.commit()


    def get_object(self, directory_name, object_name):
        '''This function get directory and object name from the user and extract the content of the object from the object storage'''
        
        self.cursor.execute(f'''SELECT reference_directory, reference_object_name FROM catalog WHERE directory = "{directory_name}" AND object_name = "{object_name}"''')
        ref = self.cursor.fetchall()
        ref = "/".join(ref[0]) # 'path' in object storage
        file = cos.Object(self.bucket, ref).get()
        print("File Contents: {0}".format(file["Body"].read()))
        self.connection.commit()
        
        
    def delete_object(self, directory_name, object_name):
        '''This function get directory and object name and delete the object from the system.
           first, we delete only the information from the database.
           If there is no other name with same refernce we shall delete the file from object storage'''
        
        self.cursor.execute(f'''SELECT reference_directory, reference_object_name FROM catalog WHERE directory = "{directory_name}" AND object_name = "{object_name}"''')
        ref = self.cursor.fetchall()
        self.cursor.execute(f'''DELETE FROM catalog WHERE directory = "{directory_name}" AND object_name = "{object_name}"''')
        self.cursor.execute(f'''SELECT COUNT(*) FROM catalog WHERE reference_directory = "{ref[0][0]}" AND reference_object_name = "{ref[0][1]}"''')
        number_of_pointers = self.cursor.fetchall()[0][0]
        if number_of_pointers==0: #delete from object storage only if there is no other referene to the object
            cos_client.delete_object(Bucket=self.bucket, Key="/".join(ref[0]))
            print("Item: {0} deleted!\n".format(object_name))
        self.connection.commit()

        
    def create_directory(self, directory_name):
        '''This function get directory name from the user and create a representation of directory in SQL DB and object storage'''
        
        uniqe_directory_name = 'dir' + md5(str(localtime()).encode('utf-8')).hexdigest() # create uniqe name for the 'directory' to avoid collisions
        #update database
        self.cursor.execute(f'''INSERT INTO catalog (directory, object_name, reference_directory, reference_object_name) VALUES ("{directory_name}", NULL, "{uniqe_directory_name}", NULL);''')
        self.connection.commit()

        
    def delete_directory(self, directory_name):
        '''This function get directory name from the user and delete all the object under this directory in the object storage'''
       
        self.cursor.execute(f'''SELECT reference_directory, reference_object_name FROM catalog WHERE directory = "{directory_name}" AND object_name IS NOT NULL''')
        ref_records = self.cursor.fetchall()
        print("This files going to delete:\n", ref_records)
        self.cursor.execute(f'''DELETE FROM catalog WHERE directory = "{directory_name}";''')
        for ref_record in ref_records:
            self.cursor.execute(f'''SELECT COUNT(*) FROM catalog WHERE reference_directory = "{ref_record[0]}" AND reference_object_name = "{ref_record[1]}"''')
            number_of_pointers = self.cursor.fetchall()[0][0]
            print(ref_record, number_of_pointers)
            if number_of_pointers==0: # delete file only if there is no another reference
                cos_client.delete_object(Bucket=self.bucket, Key="/".join(ref_record))
        print("Directory deleted!\n".format(directory_name))
        self.connection.commit()
            
    def list_directory(self, directory_name):
        '''This function get directory name from the user and print all the files under this directory'''
        
        print("Retrieving directory_name contents from: {0}".format(directory_name))
        self.cursor.execute(f'''SELECT object_name FROM catalog WHERE directory = "{directory_name}" AND object_name IS NOT NULL''')
        for file_record in self.cursor.fetchall():
            print(file_record[0])
        self.connection.commit()
    
    def rename_directory(self, prev_directory_name, new_directory_name):
        '''This function get directory name and a new directory name and rename it in O(1)'''
        
        self.cursor.execute(f'''SELECT object_name, reference_directory, reference_object_name FROM catalog WHERE directory = "{prev_directory_name}"''')
        ref_records = self.cursor.fetchall()
        for ref in ref_records: # updat the new directory name in SQL DB
            self.cursor.execute(f'''INSERT INTO catalog (directory, object_name, reference_directory, reference_object_name) VALUES ("{new_directory_name}", "{ref[0]}", "{ref[1]}", "{ref[2]}");''')
        self.cursor.execute(f'''DELETE FROM catalog WHERE directory = "{prev_directory_name}"''') # delete the old information
        print("Directory: {0} renamed to {1}!\n".format(prev_directory_name, new_directory_name))
        self.connection.commit()
    
    def rename_object(self, directory_name, prev_obj_name, new_obj_name):
        '''This function get directory name, previuos object name and new object name and rename it in O(1)'''
        
        #we first add the new row to the database with the previous name as reference and delete the previous name.
        self.cursor.execute(f'''SELECT reference_object_name FROM catalog WHERE  directory = "{directory_name}" AND object_name = "{prev_obj_name}";''') # find the original file
        ref = self.cursor.fetchall()
        # update and delete the old information
        self.cursor.execute(f'''INSERT INTO catalog (directory, object_name, reference_directory, reference_object_name) VALUES ("{directory_name}", "{new_obj_name}", "{directory_name}", "{ref[0][0]}");''')
        self.cursor.execute(f'''DELETE FROM catalog WHERE directory = "{directory_name}" AND object_name = "{prev_obj_name}"''')
        print("Item: {0} renamed to {1}!\n".format(prev_obj_name, new_obj_name))
        self.connection.commit()
        

In [7]:
# Unitest

EOS_obj = ExtendedObjectStorage()
directory_name = 'test_2'
obj_name = "csv_1.csv"
file_text="Oz Shlomi"

EOS_obj.create_directory(directory_name)
EOS_obj.create_directory(directory_name) # try to create directory that already exist
EOS_obj.create_object('dfsfs', obj_name, file_text) # try to create object in directory that doesn't exist
EOS_obj.create_object(directory_name, obj_name, file_text)
EOS_obj.create_object(directory_name, obj_name, file_text)
EOS_obj.create_object(directory_name, "csv_2.csv", "95, 100, 200")
print(pd.read_sql(f'SELECT * FROM catalog', con=EOS_obj.connection))

EOS_obj.get_object(directory_name, obj_name)
EOS_obj.create_object(directory_name, obj_name, file_text) # create object after getting
print(pd.read_sql(f'SELECT * FROM catalog', con=EOS_obj.connection))

There is no such directory!
File not created
Creating new item: csv_1.csv
Creating new item: csv_1.csv
Creating new item: csv_2.csv
  directory object_name                  reference_directory  \
0    test_2        None  dir4f8efc61b3a96d05f2a5af22a304abb8   
1    test_2        None  dir4f8efc61b3a96d05f2a5af22a304abb8   
2    test_2   csv_1.csv  dir4f8efc61b3a96d05f2a5af22a304abb8   
3    test_2   csv_1.csv  dir4f8efc61b3a96d05f2a5af22a304abb8   
4    test_2   csv_2.csv  dir4f8efc61b3a96d05f2a5af22a304abb8   

                  reference_object_name  
0                                  None  
1                                  None  
2  4f8efc61b3a96d05f2a5af22a304abb8.csv  
3  4ca3db0892b50cd2054921980af8ab07.csv  
4  1b86894f33b515d2f6a3802fee538e6d.csv  
File Contents: b'Oz Shlomi'
Creating new item: csv_1.csv
  directory object_name                  reference_directory  \
0    test_2        None  dir4f8efc61b3a96d05f2a5af22a304abb8   
1    test_2        None  dir4f8efc61b3a96d05f2

In [8]:
# renaming

EOS_obj.rename_object(directory_name, "csv_1.csv", 'rename_1.csv')
EOS_obj.rename_object(directory_name, 'rename_1.csv','rename_2.csv')
EOS_obj.list_directory(directory_name)
print(pd.read_sql(f'SELECT * FROM catalog', con=EOS_obj.connection))

Item: csv_1.csv renamed to rename_1.csv!

Item: rename_1.csv renamed to rename_2.csv!

Retrieving directory_name contents from: test_2
csv_2.csv
rename_2.csv
  directory   object_name                  reference_directory  \
0    test_2          None  dir4f8efc61b3a96d05f2a5af22a304abb8   
1    test_2          None  dir4f8efc61b3a96d05f2a5af22a304abb8   
2    test_2     csv_2.csv  dir4f8efc61b3a96d05f2a5af22a304abb8   
3    test_2  rename_2.csv                               test_2   

                  reference_object_name  
0                                  None  
1                                  None  
2  1b86894f33b515d2f6a3802fee538e6d.csv  
3  4f8efc61b3a96d05f2a5af22a304abb8.csv  


In [9]:
# EOS_obj.list_directory(directory_name)
EOS_obj.rename_directory(directory_name, 'test_3')
EOS_obj.list_directory(directory_name)
print(pd.read_sql(f'SELECT * FROM catalog', con=EOS_obj.connection))
EOS_obj.delete_directory('test_3')
print(pd.read_sql(f'SELECT * FROM catalog', con=EOS_obj.connection))
EOS_obj.list_directory(directory_name)

Directory: test_2 renamed to test_3!

Retrieving directory_name contents from: test_2
  directory   object_name                  reference_directory  \
0    test_3          None  dir4f8efc61b3a96d05f2a5af22a304abb8   
1    test_3          None  dir4f8efc61b3a96d05f2a5af22a304abb8   
2    test_3     csv_2.csv  dir4f8efc61b3a96d05f2a5af22a304abb8   
3    test_3  rename_2.csv                               test_2   

                  reference_object_name  
0                                  None  
1                                  None  
2  1b86894f33b515d2f6a3802fee538e6d.csv  
3  4f8efc61b3a96d05f2a5af22a304abb8.csv  
This files going to delete:
 [('dir4f8efc61b3a96d05f2a5af22a304abb8', 'None'), ('dir4f8efc61b3a96d05f2a5af22a304abb8', 'None'), ('dir4f8efc61b3a96d05f2a5af22a304abb8', '1b86894f33b515d2f6a3802fee538e6d.csv'), ('test_2', '4f8efc61b3a96d05f2a5af22a304abb8.csv')]
('dir4f8efc61b3a96d05f2a5af22a304abb8', 'None') 0
('dir4f8efc61b3a96d05f2a5af22a304abb8', 'None') 0
('dir4f8efc6