In [1]:
import rasterio
import os
import numpy as np
import boto3

In [2]:
def get_s3_obj_size(bucket, key):
    s3 = boto3.client('s3')
    response = s3.head_object(Bucket=bucket, Key=key)
    return response['ContentLength']

In [3]:
bucket = 'mantlelabs-eu-workflows'
key = 's2-generate_composites/Ukraine_Swissre_monthly_composites_20160101-20200507/composite_37UER_20181116-20181215_median/B08/tile_37UER_b08_median.tif'
with rasterio.open(f's3://{bucket}/{key}') as f:
    profile = f.profile
    data = f.read(1)  # read first band in file only, to get 2D-array

In [4]:
import pprint
pprint.pprint(profile)

{'blockxsize': 512,
 'blockysize': 512,
 'compress': 'deflate',
 'count': 1,
 'crs': CRS.from_epsg(32637),
 'driver': 'GTiff',
 'dtype': 'uint16',
 'height': 10980,
 'interleave': 'band',
 'nodata': 0.0,
 'tiled': True,
 'transform': Affine(10.0, 0.0, 499980.0,
       0.0, -10.0, 5600040.0),
 'width': 10980}


In [5]:
print(type(data))

<class 'numpy.ndarray'>


In [6]:
tif_size = get_s3_obj_size(bucket, key)
print('File size (MB) of compressed TIF:', round(tif_size / 1024 / 1024))

File size (MB) of compressed TIF: 197


In [7]:
print(data.dtype)
np.save('test.npy', data)

uint16


In [8]:
print('File size (MB) of numpy file:', round(os.path.getsize('test.npy') / 1024 / 1024))

File size (MB) of numpy file: 230


In [9]:
#  as long as you don't change the dimensions and/or your data type, you can just write it back
#  and it will have the same georeferencing and the same compression alg etc.

#  do something with the numpy array
data = data * 2

with rasterio.open('new_file.tif', 'w', **profile) as f:
    f.write(data, 1)
    
#  if you change the data type, you need to adjust the profile:

profile['dtype'] = np.uint8

data = data.astype(np.uint8)

with rasterio.open('new_file2.tif', 'w', **profile) as f:
    f.write(data, 1)