In [None]:
from datashelf.core import init, create_collection
import os
import shutil

In [None]:
# Create datashelf when DNE
init()

In [None]:
# Try to init when .datashelf already exists
init()

In [None]:
def rm_datashelf_dir():
    try:
        dir_path = os.path.join(os.getcwd(), ".datashelf")
        shutil.rmtree(dir_path)
        print(f"Directory '{dir_path}' and its contents successfully deleted.")
    except OSError as e:
        print(f"Error deleting directory: {e}")

In [None]:
# Create a collection when .datashelf exists
create_collection("test")

In [None]:
# Delete .datashelf and rerun create_collection to confirm that it raises an error
rm_datashelf_dir()
create_collection("Collection 1")

In [None]:
# Remake .datashelf and collection
init()
create_collection("Collection 1")

In [None]:
# Try to create a collection that already exists
create_collection("Collection 1")

In [None]:
# Function to remove collection metadata.json file
def rm_collection_metadata():
    if os.path.exists(os.path.join(os.getcwd(), ".datashelf/collection_1/collection_1_metadata.yaml")):
        try:
            os.remove(os.path.join(os.getcwd(), ".datashelf/collection_1/collection_1_metadata.yaml"))
            print(f"Metadata deleted successfully.")
        except Exception as e:
            print(f"An error occured: {e}")
    else:
        print(f"Metadata does not exist.")

In [None]:
rm_collection_metadata()

In [None]:
rm_collection_metadata()

In [None]:
# Try to create a collection when collection exists but doesn't have metadata file
create_collection("Collection 1")

In [None]:
# Add content to collection_1_metadata.yaml
import yaml
from datetime import datetime

data = {
    "files": [
        {
            "file_name": "Sales.csv",
            "date_created": "01/01/01 12:00:00",
            "date_last_modified": "01/01/01 12:00:00",
            "tag": "Orginal",
            "comments": "2025 Q3 Sales Data",
            "parent": ""
        }
    ]
}

with open('./.datashelf/collection_1/collection_1_metadata.yaml', 'w') as f:
    yaml.dump(data, f)

In [None]:
# Call create_collection("Collection 1") again to make sure metadata content doesn't get overwritten - Passed
create_collection("Collection 1")

In [None]:
# Write sample data in datashelf_metadata.yaml
import yaml
from datetime import datetime

data = {
    "collections": [
        {
            "collection_name": "Collection 1",
            "date_created": "01/01/01 12:00:00",
            "date_last_modified": "01/01/01 12:00:00",
            "files": ["collection_1_metadata.yaml", "file1.csv", "file2.csv"]
        },
        
        {
            "collection_name": "Collection 2",
            "date_created": "01/02/01 12:00:00",
            "date_last_modified":"01/02/01 12:00:00",
            "files": ["collection_2_metadata.yaml", "file1.csv", "file2.csv"]
        },
        
        {
            "collection_name": "Collection 3",
            "date_created": "01/03/01 12:00:00",
            "date_last_modified": "01/03/01 12:00:00",
            "files": ["collection_3_metadata.yaml", "file1.csv", "file2.csv"]
        }
    ]
}

with open('./.datashelf/datashelf_metadata.yaml', 'w') as f:
    yaml.dump(data, f)

In [None]:
with open('./.datashelf/datashelf_metadata.yaml' , 'r') as f:
    data = yaml.safe_load(f)

In [None]:
data['collections']

In [None]:
for i, collection in enumerate(data["collections"]):
    if collection.get("collection_name").lower().replace(" ", "_") == "collection_1":
        print(i)
    

In [None]:
data["collections"][0]['collection_name'] = "Modified Collection 1"


In [None]:
data

In [None]:
#Write changed metadata back to datashelf_metadata.yaml
with open(os.path.join(os.getcwd(), ".datashelf/datashelf_metadata.yaml"), 'w') as f:
    yaml.safe_dump(data, f)

In [None]:
print("Invalid key detected in update arg. Update can only have the following keys:\n\t-collection_name\n\t-date_created\n\t-date_last_modified\n\t-files."
    )

In [None]:
not set(['collection_name', 'files']).issubset(set(["collection_name", "date_created", "files"]))

In [None]:
# Test update function
update_dict = {'collection_name':'Newly Modified Collection 1',
               'files': ['file 1', 'file2', 'newly_modified_collection_1_metadata.yaml']}

from datashelf.utils import _edit_datashelf_metadata

_edit_datashelf_metadata('Modified Collection 1', update_dict)


In [None]:
# Check datashelf_metadata.yaml to see if it worked- Passed

with open(os.path.join(os.getcwd(), '.datashelf/datashelf_metadata.yaml'), 'r') as f:
    data = yaml.safe_load(f)
    
data

In [None]:
script_path = os.path.abspath("__file__")
project_root = os.path.dirname(script_path)
print(f"Project root (dirname(__file__)): {project_root}")

In [None]:
from pathlib import Path
current = Path.cwd()
current

def find_datashelf_root():
    """Find existing .datashelf/ directory by walking up the tree"""
    current = Path.cwd()
    
    while current != current.parent:  # Stop at filesystem root
        if (current / '.datashelf').exists():
            return current
        current = current.parent
    
    return None  # No .datashelf found


In [None]:
type(current)

In [None]:
os.listdir(current/".datashelf")

In [None]:
current/'.datashelf'

In [None]:
type(current)

In [None]:
from datashelf.utils_datashelf import _find_datashelf_root

In [None]:
_find_datashelf_root(return_datashelf_path = True)

In [None]:
x = "hello hello"
os.path.join(os.getcwd(),f'{x}_metadata.yaml')

In [None]:
create_collection("collection 2")

In [None]:
with open('./.datashelf/collection_2/collection_2_metadata.yaml', 'r') as f:
        data = yaml.safe_load(f)

In [None]:
type(data)

In [None]:
if data:
    print("if data returns true")
else:
    print("if data returns false")

In [None]:
collection_name = 'collection 2'
from datetime import datetime
current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

if not data:
    initialized_metadata = {}
    initialized_metadata['config'] = [{'collection_name': collection_name.lower().replace(" ", "_"),
                    'date_created': current_timestamp,
                    'number_of_files': 0,
                    'most_recent_commit': ""
                    }]
    initialized_metadata['files'] = []


In [None]:
initialized_metadata

In [None]:
# Write back to yaml file 
import yaml
with open('./.datashelf/collection_2/collection_2_metadata.yaml', 'w') as f:
        yaml.safe_dump(initialized_metadata, f)

In [None]:
my_dict = {}  # Create an empty dictionary

my_dict['key1'] = 'value1'
my_dict['key2'] = 123
my_dict['key3'] = True

print(my_dict)

In [2]:
import os
os.listdir('./.datashelf/collection_1')

['collection_1_metadata.yaml']