In [1]:
import numpy as np
import xarray as xr
import pandas as pd
import glob
from os.path import join, basename
from datetime import datetime
import requests

In [2]:
# everything in the output folder will be store in the Zenodo dataset
ofolder = "../data/output/cm_uniform/yearly"

# MPW global model initial locations

### Coasts

In [15]:
folder = "sources"

csv_files = [
    join(folder, "coasts_all_x.csv"),
    join(folder, "coasts_all_y.csv"),
    join(folder, "coasts_all_w.csv"),
]

df = pd.concat([pd.read_csv(file, header=None) for file in csv_files], axis=1)
df.columns = ["longitude", "latitude", "weight [ton]"]
df["weight [ton]"] /= 12  # monthly value
df["source [1:land, 0:river]"] = np.ones(len(df), dtype="int")

In [16]:
df.head()

Unnamed: 0,longitude,latitude,weight [ton],"source [1:land, 0:river]"
0,-175.4962,-21.2593,0.998492,1
1,-175.5369,-21.0527,2.214808,1
2,-174.884,-21.1788,4.810267,1
3,-175.0601,-20.9595,1.174325,1
4,-174.854,-21.2223,1.127658,1


### Rivers

In [17]:
csv_files = [
    join(folder, "rivers_all_x.csv"),
    join(folder, "rivers_all_y.csv"),
    join(folder, "rivers_all_w.csv"),
]

df_r = pd.concat([pd.read_csv(file, header=None) for file in csv_files], axis=1)
df_r.columns = ["longitude", "latitude", "weight [ton]"]
df_r["weight [ton]"] /= 12  # monthly value
df_r["source [1:land, 0:river]"] = np.zeros(len(df_r), dtype="int")

In [18]:
df_r

Unnamed: 0,longitude,latitude,weight [ton],"source [1:land, 0:river]"
0,3.8659,37.1401,1.065275,0
1,5.9465,37.0161,2.923875,0
2,7.8943,37.1285,1.035375,0
3,3.6030,37.0258,2.394083,0
4,3.1497,37.0693,1.046875,0
...,...,...,...,...
3582,91.3428,22.2389,118.788333,0
3583,91.3840,22.2174,118.788333,0
3584,91.3574,22.2282,118.788333,0
3585,91.3282,22.2190,118.788333,0


In [19]:
# save global initial release to one csv
pd.concat((df, df_r), axis=0).to_csv(
    join(ofolder, "initial-locations-global.csv"), index=False
)

In [21]:
np.sum(df["weight [ton]"]) + np.sum(df_r["weight [ton]"])

6435944.4616

# Yearly files

In [3]:
# merge release into year to reduce the number of files
folder = "../data/output/cm_uniform/"

In [4]:
release_folder = (
    "/nexsan/people/pmiron/projects_mars/caribbean-marine-litter/notebooks/releases"
)


def particle_per_year(year):
    nb_particles = []
    for m in range(1, 13):
        file = f"particles_info_{year}{m:02d}01.csv"
        df = pd.read_csv(f"{release_folder}/{file}")
        nb_particles.append(len(df))
    return nb_particles

In [8]:
for year in range(2010, 2011):
    # monthly files for year
    files = sorted(glob.glob(join(folder, f"*{year}*")))

    # particle per monthly release
    nb_particles = len(pd.read_csv(f"{release_folder}/particles_info_uniform.csv"))

    # read first file to create the dimension
    df = xr.open_dataset(files[0])
    time = np.arange(0, df.dims["obs"], dtype="int16")
    lon = np.full((nb_particles * 12, df.dims["obs"]), np.nan, dtype="float32")
    lat = np.full((nb_particles * 12, df.dims["obs"]), np.nan, dtype="float32")
    df.close()

    id0 = 0
    for i, f in enumerate(files):
        print(f"{year}/{i+1:02d}", end="\r")

        df = xr.open_dataset(f)

        # offset for monthly releases
        i_range = np.arange(id0, id0 + nb_particles)
        j_offset = len(time) - df.dims["obs"]

        lon[i_range, j_offset:] = df.lon
        lat[i_range, j_offset:] = df.lat

        df.close()
        id0 += nb_particles

    # create and save yearly netCDF
    xr.Dataset(
        data_vars=dict(
            # position and velocity
            lon=(
                ["traj", "obs"],
                lon,
                {"long_name": "longitude", "units": "degrees_east"},
            ),
            lat=(
                ["traj", "obs"],
                lat,
                {"long_name": "latitude", "units": "degrees_north"},
            ),
        ),
        coords=dict(
            time=(
                ["obs"],
                time,
                {"long_name": "time", "units": f"days since {year}-01-01"},
            ),
        ),
        attrs={
            "title": "Caribbean Marine Litter trajectories",
            "description": f"Combined monthly releases for {year}.",
            "institution": "FLorida State University Center for Ocean-Atmospheric Prediction Studies (COAPS)",
            "references": "P. Miron, X. Xu, O. Zavala-Romero, and Eric Chassignet, Seasonality of Marine Litter Hotspots in the Wider Caribbean Region",
            "date_created": datetime.now().isoformat(),
        },
    ).to_netcdf(f"{ofolder}/caribbean-marine-litter-uniform-{year}.nc")

2010/12

In [None]:
xr.open_dataset(f"{ofolder}/caribbean-marine-litter-uniform-2010.nc")

# Upload to Zenodo

In [9]:
ACCESS_TOKEN = "8FTulDI1pqIgwcNxhgvgDdBplwhlqPXHX3iDAigKISz4brFnDaWvTY3jBGvl"

headers = {"Content-Type": "application/json"}
params = {"access_token": ACCESS_TOKEN}

r = requests.get("https://zenodo.org/api/deposit/depositions", params=params)

r.status_code

200

In [11]:
r.json()[0]["links"]

{'self': 'https://zenodo.org/api/records/10347216',
 'html': 'https://zenodo.org/records/10347216',
 'doi': 'https://doi.org/10.5281/zenodo.10347216',
 'badge': 'https://zenodo.org/badge/doi/10.5281%2Fzenodo.10347216.svg',
 'files': 'https://zenodo.org/api/records/10347216/files',
 'latest_draft': 'https://zenodo.org/api/deposit/depositions/10347216',
 'latest_draft_html': 'https://zenodo.org/deposit/10347216',
 'publish': 'https://zenodo.org/api/deposit/depositions/10347216/actions/publish',
 'edit': 'https://zenodo.org/api/deposit/depositions/10347216/actions/edit',
 'discard': 'https://zenodo.org/api/deposit/depositions/10347216/actions/discard',
 'newversion': 'https://zenodo.org/api/deposit/depositions/10347216/actions/newversion',
 'registerconceptdoi': 'https://zenodo.org/api/deposit/depositions/10347216/actions/registerconceptdoi'}

In [18]:
# had to upload once with the old API to make the 'bucket' link appear
# hopefully they fix this issue soon https://github.com/zenodo/zenodo/issues/2286
bucket_url = r.json()[0]["links"]["bucket"]

In [19]:
bucket_url

'https://zenodo.org/api/files/bd9565ab-d00d-4ec9-82e8-312fb7b7f0c5'

In [None]:
files = sorted(glob.glob("yearly/*"))

In [20]:
files = sorted(glob.glob("yearly/*"))

In [35]:
# API
for file in files:
    print(f"Uploading {file}.")
    # The target URL is a combination of the bucket link with the desired filename
    # seperated by a slash.
    with open(file, "rb") as fp:
        r = requests.put(
            "%s/%s" % (bucket_url, basename(file)),
            data=fp,
            params=params,
        )
    r.json()

Uploading yearly/global-marine-litter-2013.nc.
Uploading yearly/global-marine-litter-2014.nc.
Uploading yearly/global-marine-litter-2015.nc.
Uploading yearly/global-marine-litter-2016.nc.
Uploading yearly/global-marine-litter-2017.nc.
Uploading yearly/global-marine-litter-2018.nc.
Uploading yearly/global-marine-litter-2019.nc.
Uploading yearly/global-marine-litter-2020.nc.
Uploading yearly/global-marine-litter-2021.nc.
Uploading yearly/initial-locations-global.csv.
