In [1]:
# imports to look in "leap-persistent" google cloud storage
import gcsfs
fs = gcsfs.GCSFileSystem()

# looking in a cloud storage directory

### To look at contents in a directory, use '.ls' and put your path in the parentheses. 

### The path will always start with " gs://leap-persistent/ "

We insert your username and a path associated with the default runs

In [2]:
your_username = 'galenmckinley' 
fs.ls(f'gs://leap-persistent/{your_username}/pco2_residual/nmse/post02_xgb/reconstructions/CanESM5')

['leap-persistent/galenmckinley/pco2_residual/nmse/post02_xgb/reconstructions/CanESM5/member_r1i1p1f1',
 'leap-persistent/galenmckinley/pco2_residual/nmse/post02_xgb/reconstructions/CanESM5/member_r1i1p2f1',
 'leap-persistent/galenmckinley/pco2_residual/nmse/post02_xgb/reconstructions/CanESM5/member_r2i1p1f1']

# getting file sizes

### if we want to get info for a file on the cloud storage:

In [4]:
# a random csv!
fs.info('gs://leap-persistent/galenmckinley/test_folder/test.csv')

{'kind': 'storage#object',
 'id': 'leap-persistent/galenmckinley/test_folder/test.csv/1743448790690071',
 'selfLink': 'https://www.googleapis.com/storage/v1/b/leap-persistent/o/galenmckinley%2Ftest_folder%2Ftest.csv',
 'mediaLink': 'https://storage.googleapis.com/download/storage/v1/b/leap-persistent/o/galenmckinley%2Ftest_folder%2Ftest.csv?generation=1743448790690071&alt=media',
 'name': 'leap-persistent/galenmckinley/test_folder/test.csv',
 'bucket': 'leap-persistent',
 'generation': '1743448790690071',
 'metageneration': '1',
 'contentType': 'application/octet-stream',
 'storageClass': 'STANDARD',
 'size': 17,
 'md5Hash': 'owQ8WisrmPuOUx3Yq+PjjA==',
 'crc32c': 'JUM0Fg==',
 'etag': 'CJfS6e6EtYwDEAE=',
 'timeCreated': '2025-03-31T19:19:50.693Z',
 'updated': '2025-03-31T19:19:50.693Z',
 'timeStorageClassUpdated': '2025-03-31T19:19:50.693Z',
 'timeFinalized': '2025-03-31T19:19:50.693Z',
 'type': 'file',
 'mtime': datetime.datetime(2025, 3, 31, 19, 19, 50, 693000, tzinfo=datetime.timezon

### if we want specifically the size, we use 'fs.du()' which stands for "disk usage":

In [5]:
fs.du('gs://leap-persistent/galenmckinley/test_folder/test.csv')

17

### just "17" is unclear though -> this function will make this clearer with units!

In [6]:
def convert_to_human_readable(size):
    """
    Converts disk usage size to 'human readable' format!
    """
    
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024:
            return f"{size:.2f} {unit}"
        size /= 1024

In [7]:
convert_to_human_readable(17)

'17.00 B'

### that csv is 17 bytes!

### here's another example, but with a .zarr file:

In [8]:
fs.du('gs://leap-persistent/galenmckinley/pco2_residual/nmse/post02_xgb/reconstructions/CanESM5/member_r1i1p1f1/recon_pCO2residual_CanESM5_member_r1i1p1f1_mon_1x1_200401_202312.zarr')

166750349

In [9]:
convert_to_human_readable(21858071)

'20.85 MB'

that .zarr file is 20.85 megabytes

# Deleting a file
To remove a file, you will use 'fs.rm(path)'

# ***Be very careful*** 

Make sure you are sure that you are removing what you want and that you are not deleting in someone else's directory.

In [11]:
# do not actually run this code, please!

# fs.rm('gs://leap-persistent/galenmckinley/test_folder/test.csv')

### Things are more complicated with .zarr files because .zarr files are considered "directories" by the file system
### If deleting a .zarr file, you will just need to add the argument 'recursive=True' inside the parentheses

In [12]:
# fs.rm('gs://leap-persistent/....../filename.zarr', recursive=True)