In [1]:
#! wget https://raw.githubusercontent.com/tonybutzer/teigen-lightning-talks/a557b877c8062d3a1900b8fa66bfd9c828a57592/4_Jan_2022/Panel_Examples/s3lib/bucket_analyze.py

In [2]:
import sys

sys.path.append('.')

In [3]:
from bucket_analyze import bucket_analyze

In [4]:
def get_bucket_list():
    '''
    ___________________________________________________________________
    THIS function returns watersmart S3 bucket names
    __________________________________________________________________
      - we only need THREE (3) buckets to do our work
    our buckets are used for our data inputs
          - ws-in: holds precip, NDVI, Temperature - inputs to our water models
          - ws-out: is our model outputs - sometimes just tiles and chips
          - ws-enduser: - is where we mosaic these into usable tifs, netcdfs, ugly-tarballs 
          ... etc ...
    '''
    b_list = ['ws-in', 'ws-out', 'ws-enduser']
    return b_list
    

In [5]:
bucket_list = get_bucket_list()

In [6]:
import boto3
def s3_list_pseudo_subdirs(bucket, prefix_with_slash):

    subfolder_list = []
    #Make sure you provide / in the end

    a = prefix_with_slash.split('/')
    prefix_with_slash='/'.join(a[1:])
    prefix = prefix_with_slash 
    
    print('prefix_with_slash', prefix_with_slash)

    client = boto3.client('s3')
    result = client.list_objects(Bucket=bucket, Prefix=prefix, Delimiter='/')
    for o in result.get('CommonPrefixes'):
        #print ('sub folder : ', o.get('Prefix'))
        subfolder_list.append(o.get('Prefix'))
    return subfolder_list


In [7]:
bucket='ws-in'
prefix_with_slash = '/'  # top dirs
my_subdir_list=s3_list_pseudo_subdirs(bucket, prefix_with_slash)


prefix_with_slash 


In [8]:
my_subdir_list

['CONUS/',
 'DelawareRiverBasin/',
 'EastAfrica/',
 'Marshall_Islands/',
 'NorthAmerica/',
 'WOTJE-code/',
 'cfactor/',
 'global_dT/',
 'home/',
 'tony/']

In [9]:
du_list_of_dicts = []
for bucket_name in bucket_list:
    prefix_with_slash = '/'  # top dirs
    my_subdir_list=s3_list_pseudo_subdirs(bucket_name, prefix_with_slash)
    for pdir in my_subdir_list:
        gb, dol_month = bucket_analyze(bucket_name, pdir)
        du_dict = {
            'bucket':bucket_name,
            'DirPrefix':pdir,
            'Gbytes':gb,
            'Costs':dol_month
        }
        du_list_of_dicts.append(du_dict)

prefix_with_slash 
bucket ws-in
prefix CONUS/
COUNT= 34092
STANDARD 34092
STANDARD 1237631520012
STANDARD GBYTES= 1152.634173642844
STANDARD Cost/Month= 26.510585993785412
----------------------------------------------------------------------------------------------------
GLACIER 0
GLACIER 0
GLACIER GBYTES= 0.0
GLACIER Cost/Month= 0.0
----------------------------------------------------------------------------------------------------
INTELLIGENT_TIERING 0
INTELLIGENT_TIERING 0
INTELLIGENT_TIERING GBYTES= 0.0
INTELLIGENT_TIERING Cost/Month= 0.0
----------------------------------------------------------------------------------------------------
END LOOP
G: 1152.634173642844 26.510585993785412
bucket ws-in
prefix DelawareRiverBasin/
COUNT= 795894
STANDARD 795894
STANDARD 749830659763
STANDARD GBYTES= 698.3342205761
STANDARD Cost/Month= 16.0616870732503
----------------------------------------------------------------------------------------------------
GLACIER 0
GLACIER 0
GLACIER GBYTES= 0

COUNT= 1
STANDARD 1
STANDARD 14
STANDARD GBYTES= 1.30385160446167e-08
STANDARD Cost/Month= 2.998858690261841e-10
----------------------------------------------------------------------------------------------------
GLACIER 0
GLACIER 0
GLACIER GBYTES= 0.0
GLACIER Cost/Month= 0.0
----------------------------------------------------------------------------------------------------
INTELLIGENT_TIERING 0
INTELLIGENT_TIERING 0
INTELLIGENT_TIERING GBYTES= 0.0
INTELLIGENT_TIERING Cost/Month= 0.0
----------------------------------------------------------------------------------------------------
END LOOP
G: 1 0.023
bucket ws-out
prefix tony1/
COUNT= 3
STANDARD 3
STANDARD 2054650159
STANDARD GBYTES= 1.9135420760139823
STANDARD Cost/Month= 0.04401146774832159
----------------------------------------------------------------------------------------------------
GLACIER 0
GLACIER 0
GLACIER GBYTES= 0.0
GLACIER Cost/Month= 0.0
------------------------------------------------------------------------------

In [10]:
du_list_of_dicts

[{'bucket': 'ws-in',
  'DirPrefix': 'CONUS/',
  'Gbytes': 1152.634173642844,
  'Costs': 26.510585993785412},
 {'bucket': 'ws-in',
  'DirPrefix': 'DelawareRiverBasin/',
  'Gbytes': 698.3342205761,
  'Costs': 16.0616870732503},
 {'bucket': 'ws-in',
  'DirPrefix': 'EastAfrica/',
  'Gbytes': 108.48768190201372,
  'Costs': 2.4952166837463157},
 {'bucket': 'ws-in',
  'DirPrefix': 'Marshall_Islands/',
  'Gbytes': 3.0795034440234303,
  'Costs': 0.07082857921253889},
 {'bucket': 'ws-in',
  'DirPrefix': 'NorthAmerica/',
  'Gbytes': 2360.6205078894272,
  'Costs': 54.29427168145683},
 {'bucket': 'ws-in', 'DirPrefix': 'WOTJE-code/', 'Gbytes': 1, 'Costs': 0.023},
 {'bucket': 'ws-in',
  'DirPrefix': 'cfactor/',
  'Gbytes': 149.86372096277773,
  'Costs': 3.4468655821438876},
 {'bucket': 'ws-in',
  'DirPrefix': 'global_dT/',
  'Gbytes': 4.07246426679194,
  'Costs': 0.09366667813621461},
 {'bucket': 'ws-in',
  'DirPrefix': 'home/',
  'Gbytes': 9.139479291625321,
  'Costs': 0.21020802370738237},
 {'bucke

In [11]:
import pandas as pd

In [12]:
df = pd.DataFrame(du_list_of_dicts)

In [13]:
df

Unnamed: 0,bucket,DirPrefix,Gbytes,Costs
0,ws-in,CONUS/,1152.634174,26.510586
1,ws-in,DelawareRiverBasin/,698.334221,16.061687
2,ws-in,EastAfrica/,108.487682,2.495217
3,ws-in,Marshall_Islands/,3.079503,0.070829
4,ws-in,NorthAmerica/,2360.620508,54.294272
5,ws-in,WOTJE-code/,1.0,0.023
6,ws-in,cfactor/,149.863721,3.446866
7,ws-in,global_dT/,4.072464,0.093667
8,ws-in,home/,9.139479,0.210208
9,ws-in,tony/,1.0,0.023


In [14]:
mdf = df.loc[df['Gbytes'] > 1]

In [15]:
#!rclone size wss3:/ws-enduser/DelawareRiverBasin

In [16]:
#!rclone size wss3:/ws-in/DelawareRiverBasin

In [17]:
mdf

Unnamed: 0,bucket,DirPrefix,Gbytes,Costs
0,ws-in,CONUS/,1152.634174,26.510586
1,ws-in,DelawareRiverBasin/,698.334221,16.061687
2,ws-in,EastAfrica/,108.487682,2.495217
3,ws-in,Marshall_Islands/,3.079503,0.070829
4,ws-in,NorthAmerica/,2360.620508,54.294272
6,ws-in,cfactor/,149.863721,3.446866
7,ws-in,global_dT/,4.072464,0.093667
8,ws-in,home/,9.139479,0.210208
10,ws-out,CONUS/,295945.401661,6806.744238
11,ws-out,EastAfrica/,8220.28159,189.066477


In [18]:
mdf.to_csv('./ws_s3_usage.csv', index=False)

In [19]:
! ls *.csv

ws_s3_usage.csv
