# Upload, download and use files in YTsaurus operations

This notebook contains examples of how to store large binary data on YTsaurus as [files](https://ytsaurus.tech/docs/en/user-guide/storage/files).

This notebooks demonstrates how to:
1. Upload file.
2. Read file.
3. Use file in operation.

Files can be useful in cases:
1. Operations require using dictionaries (e.g., for working with geodata, currency rates, etc)
2. For saving checkpoints or a trained model.
3. You need to store data in a reliable storage.
4. We store big Jupyter notebooks as files:)

In [2]:
from yt import wrapper as yt 
import uuid 
import json

## Create a base directory for examples

In [4]:
working_dir = f"//tmp/examples/files_{uuid.uuid4()}"
yt.create("map_node", working_dir, recursive=True)
print(working_dir)

//tmp/examples/files_9af46911-349a-4578-bb6e-dcec3d695ca3


## Upload and download file as serialized json

In [6]:
rates = {"USD":1,"AED":3.6725,"AFN":70.930145,"ALL":89.880745,"AMD":388.084007,"ANG":1.79,"AOA":910.532308,"ARS":948.17,"AUD":1.474829,"AWG":1.79,"AZN":1.699237,"BAM":1.749036,"BBD":2,"BDT":119.497266,"BGN":1.749165,"BHD":0.376,"BIF":2900.267644,"BMD":1,"BND":1.302522,"BOB":6.918444,"BRL":5.563555,"BSD":1,"BTN":83.858202,"BWP":13.368056,"BYN":3.25702,"BZD":2,"CAD":1.351507,"CDF":2821.533873,"CHF":0.847992,"CLP":914.946844,"CNY":7.125721,"COP":4032.124348,"CRC":523.251219,"CUP":24,"CVE":98.606453,"CZK":22.409394,"DJF":177.721,"DKK":6.669549,"DOP":59.725937,"DZD":134.240444,"EGP":48.773605,"ERN":15,"ETB":110.793381,"EUR":0.894271,"FJD":2.209201,"FKP":0.758066,"FOK":6.669253,"GBP":0.75807,"GEL":2.698374,"GGP":0.758066,"GHS":15.646015,"GIP":0.758066,"GMD":70.090529,"GNF":8687.65698,"GTQ":7.739001,"GYD":209.238751,"HKD":7.796834,"HNL":24.795486,"HRK":6.737862,"HTG":131.727086,"HUF":351.816224,"IDR":15463.537936,"ILS":3.689802,"IMP":0.758066,"INR":83.858211,"IQD":1307.71534,"IRR":42046.557011,"ISK":136.842504,"JEP":0.758066,"JMD":156.609396,"JOD":0.709,"JPY":144.238461,"KES":128.860802,"KGS":85.514332,"KHR":4063.922959,"KID":1.474819,"KMF":439.950981,"KRW":1326.065473,"KWD":0.305137,"KYD":0.833333,"KZT":483.412984,"LAK":21958.776695,"LBP":89500,"LKR":299.54412,"LRD":195.128633,"LSL":17.727478,"LYD":4.761189,"MAD":9.651574,"MDL":17.440239,"MGA":4559.019151,"MKD":55.302516,"MMK":2099.920135,"MNT":3394.316353,"MOP":8.031133,"MRU":39.757746,"MUR":46.041061,"MVR":15.425412,"MWK":1734.083284,"MXN":19.262175,"MYR":4.37505,"MZN":63.668821,"NAD":17.727478,"NGN":1572.880665,"NIO":36.923597,"NOK":10.483587,"NPR":134.173123,"NZD":1.607849,"OMR":0.384497,"PAB":1,"PEN":3.743768,"PGK":3.892274,"PHP":56.230734,"PKR":278.734522,"PLN":3.824356,"PYG":7598.969319,"QAR":3.64,"RON":4.471734,"RSD":105.116406,"RUB":91.507333,"RWF":1331.048915,"SAR":3.75,"SBD":8.500735,"SCR":13.837586,"SDG":458.303626,"SEK":10.200087,"SGD":1.302527,"SHP":0.758066,"SLE":22.414795,"SLL":22414.795049,"SOS":571.574926,"SRD":29.173375,"SSP":2809.067693,"STN":21.909564,"SYP":13122.06765,"SZL":17.727478,"THB":34.138685,"TJS":10.605013,"TMT":3.499058,"TND":3.045497,"TOP":2.324466,"TRY":34.030627,"TTD":6.784157,"TVD":1.474819,"TWD":31.688138,"TZS":2699.373687,"UAH":41.241903,"UGX":3715.735216,"UYU":40.352563,"UZS":12685.942826,"VES":36.5888,"VND":24991.37597,"VUV":117.995378,"WST":2.694395,"XAF":586.601308,"XCD":2.7,"XDR":0.742859,"XOF":586.601308,"XPF":106.714781,"YER":250.215458,"ZAR":17.727644,"ZMW":26.177908,"ZWL":13.8134}

In [7]:
json_rates = json.dumps(rates).encode("utf-8")

`write_file` can wright arbitrary binary data

In [9]:
file_path = f"{working_dir}/file"
yt.write_file(file_path, json_rates)

In [10]:
content = yt.read_file(file_path).read()
json.loads(content).keys()

dict_keys(['USD', 'AED', 'AFN', 'ALL', 'AMD', 'ANG', 'AOA', 'ARS', 'AUD', 'AWG', 'AZN', 'BAM', 'BBD', 'BDT', 'BGN', 'BHD', 'BIF', 'BMD', 'BND', 'BOB', 'BRL', 'BSD', 'BTN', 'BWP', 'BYN', 'BZD', 'CAD', 'CDF', 'CHF', 'CLP', 'CNY', 'COP', 'CRC', 'CUP', 'CVE', 'CZK', 'DJF', 'DKK', 'DOP', 'DZD', 'EGP', 'ERN', 'ETB', 'EUR', 'FJD', 'FKP', 'FOK', 'GBP', 'GEL', 'GGP', 'GHS', 'GIP', 'GMD', 'GNF', 'GTQ', 'GYD', 'HKD', 'HNL', 'HRK', 'HTG', 'HUF', 'IDR', 'ILS', 'IMP', 'INR', 'IQD', 'IRR', 'ISK', 'JEP', 'JMD', 'JOD', 'JPY', 'KES', 'KGS', 'KHR', 'KID', 'KMF', 'KRW', 'KWD', 'KYD', 'KZT', 'LAK', 'LBP', 'LKR', 'LRD', 'LSL', 'LYD', 'MAD', 'MDL', 'MGA', 'MKD', 'MMK', 'MNT', 'MOP', 'MRU', 'MUR', 'MVR', 'MWK', 'MXN', 'MYR', 'MZN', 'NAD', 'NGN', 'NIO', 'NOK', 'NPR', 'NZD', 'OMR', 'PAB', 'PEN', 'PGK', 'PHP', 'PKR', 'PLN', 'PYG', 'QAR', 'RON', 'RSD', 'RUB', 'RWF', 'SAR', 'SBD', 'SCR', 'SDG', 'SEK', 'SGD', 'SHP', 'SLE', 'SLL', 'SOS', 'SRD', 'SSP', 'STN', 'SYP', 'SZL', 'THB', 'TJS', 'TMT', 'TND', 'TOP', 'TRY', 'T

## Use files in operations

In [12]:
def mapper(row):
    # file's name is equial to table's name
    assert "USD" in open("file").read()

In [13]:
src = f"{working_dir}/fake_src"
dst = f"{working_dir}/fake_dst"

yt.write_table(src, [{"x": 1}])

`yt_files` parameter makes operation download file from cypress and save it on job's file system. Local file has the same name as cypress node.

In [15]:
yt.run_map(
    mapper,
    src,
    dst,
    yt_files=[file_path],
)

2025-01-21 19:35:56,260	INFO	Operation started: https://planck.yt.nebius.yt/playground/operations/f38820d0-1d53dea1-134403e8-4f500a3/details


2025-01-21 19:35:56,287	INFO	( 0 min) operation f38820d0-1d53dea1-134403e8-4f500a3 starting


2025-01-21 19:35:56,811	INFO	( 0 min) operation f38820d0-1d53dea1-134403e8-4f500a3 initializing


2025-01-21 19:35:57,369	INFO	( 0 min) Unrecognized spec: {'enable_partitioned_data_balancing': false, 'mapper': {'title': 'mapper'}}


2025-01-21 19:35:59,570	INFO	( 0 min) operation f38820d0-1d53dea1-134403e8-4f500a3: running=0     completed=0     pending=1     failed=0     aborted=0     lost=0     total=1     blocked=0    


2025-01-21 19:36:03,002	INFO	( 0 min) operation f38820d0-1d53dea1-134403e8-4f500a3: running=1     completed=0     pending=0     failed=0     aborted=0     lost=0     total=1     blocked=0    


2025-01-21 19:39:26,592	INFO	( 3 min) operation f38820d0-1d53dea1-134403e8-4f500a3 completed


<yt.wrapper.operation_commands.Operation at 0x7f9398bda890>