In [1]:
import os
import xarray as xr
import pandas as pd
import numpy as np
from geopy.distance import geodesic
import requests

from ibm_watsonx_ai.foundation_models import Model

## 311 Data


In [4]:
def nc_to_xr():

    for file_name in enumerate(
        [file for file in os.listdir() if file != "ALL_SITES.nc"]
    ):

        if file_name[0] == 0:
            ds_out = xr.open_dataset(file_name[1]).assign_coords(
                {
                    "site": (
                        "site",
                        ["{site_name}".format(site_name=file_name[1].split(".")[0])],
                    )
                }
            )
    else:
        ds_out = xr.concat(
            [
                ds_out,
                xr.open_dataset(file_name[1]).assign_coords(
                    {
                        "site": (
                            "site",
                            [
                                "{site_name}, ({lat}, {lon})".format(
                                    site_name=file_name[1].split(".")[0],
                                    lat=xr.open_dataset(file_name[1])["latitude"][
                                        0
                                    ].values,
                                    lon=xr.open_dataset(file_name[1])["longitude"][
                                        0
                                    ].values,
                                )
                            ],
                        )
                    }
                ),
            ],
            dim="site",
        )


# Write to netcdf as ALL_SITES.nc
# ds_out.to_netcdf('ALL_SITES.nc')

In [5]:
response = requests.get(
    "https://data.cityofnewyork.us/resource/erm2-nwe9.json?$query=SELECT%0A%20%20%60unique_key%60%2C%0A%20%20%60created_date%60%2C%0A%20%20%60closed_date%60%2C%0A%20%20%60agency%60%2C%0A%20%20%60agency_name%60%2C%0A%20%20%60complaint_type%60%2C%0A%20%20%60descriptor%60%2C%0A%20%20%60location_type%60%2C%0A%20%20%60incident_zip%60%2C%0A%20%20%60incident_address%60%2C%0A%20%20%60street_name%60%2C%0A%20%20%60cross_street_1%60%2C%0A%20%20%60cross_street_2%60%2C%0A%20%20%60intersection_street_1%60%2C%0A%20%20%60intersection_street_2%60%2C%0A%20%20%60address_type%60%2C%0A%20%20%60city%60%2C%0A%20%20%60landmark%60%2C%0A%20%20%60facility_type%60%2C%0A%20%20%60status%60%2C%0A%20%20%60due_date%60%2C%0A%20%20%60resolution_description%60%2C%0A%20%20%60resolution_action_updated_date%60%2C%0A%20%20%60community_board%60%2C%0A%20%20%60bbl%60%2C%0A%20%20%60borough%60%2C%0A%20%20%60x_coordinate_state_plane%60%2C%0A%20%20%60y_coordinate_state_plane%60%2C%0A%20%20%60open_data_channel_type%60%2C%0A%20%20%60park_facility_name%60%2C%0A%20%20%60park_borough%60%2C%0A%20%20%60vehicle_type%60%2C%0A%20%20%60taxi_company_borough%60%2C%0A%20%20%60taxi_pick_up_location%60%2C%0A%20%20%60bridge_highway_name%60%2C%0A%20%20%60bridge_highway_direction%60%2C%0A%20%20%60road_ramp%60%2C%0A%20%20%60bridge_highway_segment%60%2C%0A%20%20%60latitude%60%2C%0A%20%20%60longitude%60%2C%0A%20%20%60location%60%0AWHERE%0A%20%20caseless_contains(%60descriptor%60%2C%20%22Manhole%20Overflow%20%22)%0A%20%20%20%20OR%20(caseless_contains(%60descriptor%60%2C%20%22Street%20Flooding%22)%0A%20%20%20%20%20%20%20%20%20%20OR%20(caseless_contains(%60descriptor%60%2C%20%22Sewer%20Back%20Up%22)%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20OR%20caseless_contains(%60descriptor%60%2C%20%22Catch%20Basin%22)))%0AORDER%20BY%20%60created_date%60%20DESC%20NULL%20FIRST"
)

# The API is pre-queried for September 2021 - Hurricane Ida
# response = requests.get('https://data.cityofnewyork.us/resource/erm2-nwe9.json?$query=SELECT%0A%20%20%60unique_key%60%2C%0A%20%20%60created_date%60%2C%0A%20%20%60closed_date%60%2C%0A%20%20%60agency%60%2C%0A%20%20%60agency_name%60%2C%0A%20%20%60complaint_type%60%2C%0A%20%20%60descriptor%60%2C%0A%20%20%60location_type%60%2C%0A%20%20%60incident_zip%60%2C%0A%20%20%60incident_address%60%2C%0A%20%20%60street_name%60%2C%0A%20%20%60cross_street_1%60%2C%0A%20%20%60cross_street_2%60%2C%0A%20%20%60intersection_street_1%60%2C%0A%20%20%60intersection_street_2%60%2C%0A%20%20%60address_type%60%2C%0A%20%20%60city%60%2C%0A%20%20%60landmark%60%2C%0A%20%20%60facility_type%60%2C%0A%20%20%60status%60%2C%0A%20%20%60due_date%60%2C%0A%20%20%60resolution_description%60%2C%0A%20%20%60resolution_action_updated_date%60%2C%0A%20%20%60community_board%60%2C%0A%20%20%60bbl%60%2C%0A%20%20%60borough%60%2C%0A%20%20%60x_coordinate_state_plane%60%2C%0A%20%20%60y_coordinate_state_plane%60%2C%0A%20%20%60open_data_channel_type%60%2C%0A%20%20%60park_facility_name%60%2C%0A%20%20%60park_borough%60%2C%0A%20%20%60vehicle_type%60%2C%0A%20%20%60taxi_company_borough%60%2C%0A%20%20%60taxi_pick_up_location%60%2C%0A%20%20%60bridge_highway_name%60%2C%0A%20%20%60bridge_highway_direction%60%2C%0A%20%20%60road_ramp%60%2C%0A%20%20%60bridge_highway_segment%60%2C%0A%20%20%60latitude%60%2C%0A%20%20%60longitude%60%2C%0A%20%20%60location%60%0AWHERE%0A%20%20(caseless_contains(%60descriptor%60%2C%20%22Manhole%20Overflow%20%22)%0A%20%20%20%20%20OR%20(caseless_contains(%60descriptor%60%2C%20%22Street%20Flooding%22)%0A%20%20%20%20%20%20%20%20%20%20%20OR%20(caseless_contains(%60descriptor%60%2C%20%22Sewer%20Back%20Up%22)%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20OR%20caseless_contains(%60descriptor%60%2C%20%22Catch%20Basin%22))))%0A%20%20AND%20(%60created_date%60%0A%20%20%20%20%20%20%20%20%20BETWEEN%20%222021-09-01T00%3A00%3A00%22%20%3A%3A%20floating_timestamp%0A%20%20%20%20%20%20%20%20%20AND%20%222021-09-10T21%3A56%3A14%22%20%3A%3A%20floating_timestamp)%0AORDER%20BY%20%60created_date%60%20DESC%20NULL%20FIRST')
results_df = pd.DataFrame.from_dict(response.json())
results_df.drop(
    labels=[
        index
        for index in results_df.columns
        if index
        not in ["closed_date", "created_date", "descriptor", "latitude", "longitude"]
    ],
    axis=1,
)

Unnamed: 0,created_date,descriptor,latitude,longitude,closed_date
0,2024-09-15T22:56:00.000,Catch Basin Search (SC2),40.63389388906263,-73.90269341053548,
1,2024-09-15T22:17:00.000,Catch Basin Sunken/Damaged/Raised (SC1),40.6447411964386,-73.9244857734746,
2,2024-09-15T20:35:00.000,Catch Basin Search (SC2),40.72135326817355,-74.00463577564992,
3,2024-09-15T20:23:00.000,Grease In Sewer/Catch Basin (IDG),40.81361445074773,-73.9157178276261,
4,2024-09-15T19:11:00.000,Street Flooding (SJ),40.59117886604837,-73.81005152429094,
...,...,...,...,...,...
995,2024-08-19T10:14:00.000,Catch Basin Search (SC2),40.82615281329658,-73.92025830601959,2024-08-25T09:50:00.000
996,2024-08-19T10:08:00.000,Street Flooding (SJ),40.71652996688692,-73.85710212217046,2024-08-19T11:15:00.000
997,2024-08-19T10:02:00.000,Catch Basin Clogged/Flooding (Use Comments) (SC),40.578317967655614,-73.93980277601726,2024-08-20T10:45:00.000
998,2024-08-19T09:47:00.000,Catch Basin Clogged/Flooding (Use Comments) (SC),40.848913547034634,-73.85443213755288,2024-08-22T16:20:00.000


In [6]:
query_311_calls("2024-09-01 13:06:09")

NameError: name 'query_311_calls' is not defined

In [7]:
endpoint = "https://data.cityofnewyork.us/resource/erm2-nwe9.json?&$where=created_date between '{}' and '{}' &complaint_type='Sewer'".format(
    "2021-09-01", "2021-09-02"
)
response = requests.get(endpoint)
results_df = pd.DataFrame.from_dict(response.json()).iloc[-50::]
results_df

Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,incident_zip,incident_address,street_name,...,x_coordinate_state_plane,y_coordinate_state_plane,open_data_channel_type,park_facility_name,park_borough,latitude,longitude,location,intersection_street_1,intersection_street_2
950,51714144,2021-09-01T22:21:00.000,2021-09-05T11:15:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11218,65 EAST 10 STREET,EAST 10 STREET,...,992463,175021,ONLINE,Unspecified,BROOKLYN,40.64706505485257,-73.97040370878996,"{'latitude': '40.64706505485257', 'longitude':...",,
951,51714146,2021-09-01T22:21:00.000,2021-09-06T10:05:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11217,108 6 AVENUE,6 AVENUE,...,991023,186643,ONLINE,Unspecified,BROOKLYN,40.67896609055225,-73.97558120441936,"{'latitude': '40.67896609055225', 'longitude':...",,
952,51724362,2021-09-01T22:21:00.000,2021-09-11T18:20:00.000,DEP,Department of Environmental Protection,Sewer,Catch Basin Clogged/Flooding (Use Comments) (SC),11369,30-02 92 STREET,92 STREET,...,1018335,216695,ONLINE,Unspecified,QUEENS,40.7613884698132,-73.87696074621682,"{'latitude': '40.7613884698132', 'longitude': ...",,
953,51729772,2021-09-01T22:21:00.000,2021-09-02T00:00:00.000,DEP,Department of Environmental Protection,Sewer,Sewer Backup (Use Comments) (SA),11226,239 EAST 23 STREET,EAST 23 STREET,...,996576,173809,PHONE,Unspecified,BROOKLYN,40.64373360732024,-73.95558435650193,"{'latitude': '40.64373360732024', 'longitude':...",,
954,51714145,2021-09-01T22:21:00.000,2021-09-06T12:45:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11370,73-03 31 AVENUE,31 AVENUE,...,1013547,215592,ONLINE,Unspecified,QUEENS,40.75837818221721,-73.89424914771638,"{'latitude': '40.75837818221721', 'longitude':...",,
955,51720435,2021-09-01T22:21:00.000,2021-09-02T09:30:00.000,DEP,Department of Environmental Protection,Sewer,Sewer Backup (Use Comments) (SA),11421,90-14 91 AVENUE,91 AVENUE,...,1025022,190333,PHONE,Unspecified,QUEENS,40.68900297835585,-73.85298192057249,"{'latitude': '40.68900297835585', 'longitude':...",,
956,51720432,2021-09-01T22:21:00.000,2021-09-02T02:50:00.000,DEP,Department of Environmental Protection,Sewer,Sewer Backup (Use Comments) (SA),10461,2420 TRATMAN AVENUE,TRATMAN AVENUE,...,1026559,244554,ONLINE,Unspecified,BRONX,40.83781817010061,-73.84709845709146,"{'latitude': '40.83781817010061', 'longitude':...",,
957,51714105,2021-09-01T22:21:00.000,2021-09-05T17:40:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11238,291 STERLING PLACE,STERLING PLACE,...,993027,185445,ONLINE,Unspecified,BROOKLYN,40.67567609121362,-73.9683577147682,"{'latitude': '40.675676091213624', 'longitude'...",,
958,51725116,2021-09-01T22:21:00.000,2021-09-05T09:25:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11207,331 HIGHLAND BOULEVARD,HIGHLAND BOULEVARD,...,1014589,188357,ONLINE,Unspecified,BROOKLYN,40.68362122714782,-73.89061069847263,"{'latitude': '40.68362122714782', 'longitude':...",,
959,51715977,2021-09-01T22:21:00.000,2021-09-04T10:30:00.000,DEP,Department of Environmental Protection,Sewer,Sewer Backup (Use Comments) (SA),11234,1022 EAST 59 STREET,EAST 59 STREET,...,1006683,168088,ONLINE,Unspecified,BROOKLYN,40.6280108533589,-73.91918371722146,"{'latitude': '40.6280108533589', 'longitude': ...",,


In [8]:
def query_311_calls(time_str):

    # Return 311 calls from query with timestamp returned from geocode_address() in simple_server.py
    # Ensure input argument is a string; output is a Pandas DataFrame

    # The API is pre-queried for Street Flooding (SF), Sewer Back-Up (BU), Manhole Overflow (MO), Catch Basin (CB)
    response = requests.get(
        "https://data.cityofnewyork.us/resource/erm2-nwe9.json?$query=SELECT%0A%20%20%60unique_key%60%2C%0A%20%20%60created_date%60%2C%0A%20%20%60closed_date%60%2C%0A%20%20%60agency%60%2C%0A%20%20%60agency_name%60%2C%0A%20%20%60complaint_type%60%2C%0A%20%20%60descriptor%60%2C%0A%20%20%60location_type%60%2C%0A%20%20%60incident_zip%60%2C%0A%20%20%60incident_address%60%2C%0A%20%20%60street_name%60%2C%0A%20%20%60cross_street_1%60%2C%0A%20%20%60cross_street_2%60%2C%0A%20%20%60intersection_street_1%60%2C%0A%20%20%60intersection_street_2%60%2C%0A%20%20%60address_type%60%2C%0A%20%20%60city%60%2C%0A%20%20%60landmark%60%2C%0A%20%20%60facility_type%60%2C%0A%20%20%60status%60%2C%0A%20%20%60due_date%60%2C%0A%20%20%60resolution_description%60%2C%0A%20%20%60resolution_action_updated_date%60%2C%0A%20%20%60community_board%60%2C%0A%20%20%60bbl%60%2C%0A%20%20%60borough%60%2C%0A%20%20%60x_coordinate_state_plane%60%2C%0A%20%20%60y_coordinate_state_plane%60%2C%0A%20%20%60open_data_channel_type%60%2C%0A%20%20%60park_facility_name%60%2C%0A%20%20%60park_borough%60%2C%0A%20%20%60vehicle_type%60%2C%0A%20%20%60taxi_company_borough%60%2C%0A%20%20%60taxi_pick_up_location%60%2C%0A%20%20%60bridge_highway_name%60%2C%0A%20%20%60bridge_highway_direction%60%2C%0A%20%20%60road_ramp%60%2C%0A%20%20%60bridge_highway_segment%60%2C%0A%20%20%60latitude%60%2C%0A%20%20%60longitude%60%2C%0A%20%20%60location%60%0AWHERE%0A%20%20caseless_contains(%60descriptor%60%2C%20%22Manhole%20Overflow%20%22)%0A%20%20%20%20OR%20(caseless_contains(%60descriptor%60%2C%20%22Street%20Flooding%22)%0A%20%20%20%20%20%20%20%20%20%20OR%20(caseless_contains(%60descriptor%60%2C%20%22Sewer%20Back%20Up%22)%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20OR%20caseless_contains(%60descriptor%60%2C%20%22Catch%20Basin%22)))%0AORDER%20BY%20%60created_date%60%20DESC%20NULL%20FIRST"
    )
    results = pd.DataFrame.from_dict(response.json())
    results_df = results.drop(
        labels=[
            index
            for index in results.columns
            if index
            not in [
                "closed_date",
                "created_date",
                "descriptor",
                "latitude",
                "longitude",
            ]
        ],
        axis=1,
    )

    index_list = []
    for index in np.arange(len(results_df)):
        if pd.to_datetime(
            " ".join(results_df["created_date"].iloc[index].split("T"))
        ) in pd.period_range(
            start=pd.to_datetime(time_str) - pd.Timedelta(hours=2),
            end=pd.to_datetime(time_str),
            freq="S",
        ):
            index_list.append(index)
            continue
        else:
            if len(index_list) > 0:
                break

    # return(index_list)
    return results_df.iloc[index_list[0] : index_list[-1] + 1]

In [9]:
# sensor_data = xr.open_dataset('/Users/f004xr3/Downloads/NYC_Micronet/ALL_SITES.nc')
def return_sensor_coords(sensor_data):

    # Return dictionary with coordinates (lat, lon) of the sensor points
    # Ensure input argument is an Xarray Dataset with dimension 'site'; output is a dictionary

    site_dict = {}
    for site_name in sensor_data["site"].values:
        da = sensor_data.sel(site=site_name)
        site_dict[site_name] = (
            np.unique(da["latitude"].values[~np.isnan(da["latitude"].values)]),
            np.unique(da["longitude"].values[~np.isnan(da["longitude"].values)]),
        )

    return site_dict

In [10]:
def return_exclusions():
    return exclusion_points

In [11]:
exclusion_points = []
# results_df = query_311_calls('2024-09-01 13:06:09')
for index in np.arange(len(results_df)):

    # Compute distances between the 311 lat/lon and sensor lat/lon
    dist_dict = {}
    if float(results_df["latitude"].iloc[index]) == float(
        results_df["latitude"].iloc[index]
    ):
        if float(results_df["longitude"].iloc[index]) == float(
            results_df["longitude"].iloc[index]
        ):
            for site_name in return_sensor_coords(sensor_data).keys():
                dist_dict[(site_name)] = geodesic(
                    (
                        float(results_df["latitude"].iloc[index]),
                        float(results_df["longitude"].iloc[index]),
                    ),
                    (
                        return_sensor_coords(sensor_data)[site_name][0][0],
                        return_sensor_coords(sensor_data)[site_name][1][0],
                    ),
                ).m
        else:
            continue

    # Note: Even if lat/lon is not provided there can be a way to use the address if provided
    else:
        continue

    # Return sensor lat/lon with shortest distance
    index_date = pd.to_datetime(
        " ".join(results_df["created_date"].iloc[index].split("T"))
    )
    sensor_index = (
        xr.open_dataset("../ALL_SITES.nc")
        .sel(
            time=slice(
                index_date - pd.Timedelta(hours=4), index_date + pd.Timedelta(hours=4)
            )
        )
        .sel(site=min(dist_dict, key=dist_dict.get))
    )
    precip_values = sm_values = []
    for value in np.arange(len(sensor_index.coords["time"])):

        if sensor_index.isel(time=value)["precip_max_intensity"].values > 0.2:
            precip_values.append(
                sensor_index.isel(time=value)["precip_max_intensity"].values
            )
            if len(precip_values) == 6:
                # Conditional currently set to 6 for 30min period
                exclusion_points.append(
                    (
                        float(results_df["latitude"].iloc[index]),
                        float(results_df["longitude"].iloc[index]),
                    )
                )
                precip_values = []
            continue

        elif sensor_index.isel(time=value)["soil_moisture_05cm"].values > 0.5:
            sm_values.append(sensor_index.isel(time=value)["soil_moisture_05cm"].values)
            if len(sm_values) == 6:
                # Conditional currently set to 6 for 30min period
                exclusion_points.append(
                    (
                        float(results_df["latitude"].iloc[index]),
                        float(results_df["longitude"].iloc[index]),
                    )
                )
                sm_values = []
            continue

        else:
            precip_values = sm_values = []
            continue

NameError: name 'sensor_data' is not defined

In [None]:
exclusion_points = []
# results_df = query_311_calls('2024-09-01 13:06:09')
for index in np.arange(len(results_df)):

    # Compute distances between the 311 lat/lon and sensor lat/lon
    dist_dict = {}
    if float(results_df["latitude"].iloc[index]) == float(
        results_df["latitude"].iloc[index]
    ):
        if float(results_df["longitude"].iloc[index]) == float(
            results_df["longitude"].iloc[index]
        ):
            for site_name in return_sensor_coords(sensor_data).keys():
                dist_dict[(site_name)] = geodesic(
                    (
                        float(results_df["latitude"].iloc[index]),
                        float(results_df["longitude"].iloc[index]),
                    ),
                    (
                        return_sensor_coords(sensor_data)[site_name][0][0],
                        return_sensor_coords(sensor_data)[site_name][1][0],
                    ),
                ).m
        else:
            continue

    # Note: Even if lat/lon is not provided there can be a way to use the address if provided
    else:
        continue

    # Return sensor lat/lon with shortest distance
    index_date = pd.to_datetime(
        " ".join(results_df["created_date"].iloc[index].split("T"))
    )
    sensor_index = (
        xr.open_dataset("/Users/f004xr3/Downloads/NYC_Micronet/ALL_SITES.nc")
        .sel(
            time=slice(
                index_date - pd.Timedelta(hours=4), index_date + pd.Timedelta(hours=4)
            )
        )
        .sel(site=min(dist_dict, key=dist_dict.get))
    )
    precip_values = sm_values = []
    for value in np.arange(len(sensor_index.coords["time"])):

        if sensor_index.isel(time=value)["precip_max_intensity"].values > 0.2:
            precip_values.append(
                sensor_index.isel(time=value)["precip_max_intensity"].values
            )
            if len(precip_values) == 6:
                # Conditional currently set to 6 for 30min period
                exclusion_points.append(
                    (
                        float(results_df["latitude"].iloc[index]),
                        float(results_df["longitude"].iloc[index]),
                    )
                )
                precip_values = []
            continue

        elif sensor_index.isel(time=value)["soil_moisture_05cm"].values > 0.5:
            sm_values.append(sensor_index.isel(time=value)["soil_moisture_05cm"].values)
            if len(sm_values) == 6:
                # Conditional currently set to 6 for 30min period
                exclusion_points.append(
                    (
                        float(results_df["latitude"].iloc[index]),
                        float(results_df["longitude"].iloc[index]),
                    )
                )
                sm_values = []
            continue

        else:
            precip_values = sm_values = []
            continue

In [433]:
exclusion_points.append(
    (
        float(results_df["latitude"].iloc[index]),
        float(results_df["longitude"].iloc[index]),
    )
)

In [439]:
set(exclusion_points)

{(40.546404318911264, -74.15202962902956),
 (40.56131007770137, -74.1813123915363),
 (40.59150780655313, -73.94187202882603),
 (40.60446623400138, -74.01305828283918),
 (40.606547144057565, -74.06224666674144),
 (40.60978595455688, -74.00999798171607),
 (40.60996469060479, -73.99496137829729),
 (40.611646495186235, -74.1014629160993),
 (40.61222892944777, -74.00866931651775),
 (40.61278383449121, -74.06623600786988),
 (40.624814500596194, -74.07890314531045),
 (40.6280108533589, -73.91918371722146),
 (40.640384820294486, -73.92814437995956),
 (40.641440019569124, -74.08609334887396),
 (40.642838650959206, -73.95518137557912),
 (40.64373360732024, -73.95558435650193),
 (40.64706505485257, -73.97040370878996),
 (40.668200075658525, -73.98599561383345),
 (40.670654225566096, -73.94853333620769),
 (40.674274937628056, -74.00643503376475),
 (40.67497228184598, -73.99859401153655),
 (40.675676091213624, -73.9683577147682),
 (40.676231243854815, -73.9710469058759),
 (40.67896609055225, -73.97

### Filter and group the DataFrame on the basis of date and complaint type:


In [192]:
da = xr.open_dataset("ALL_SITES.nc").sel(site="MHERAC")["latitude"]
np.unique(da.values[~np.isnan(da.values)])

array([40.725], dtype=float32)

## Micronet NYC


In [80]:
# os.chdir('NYC_Micronet')
for file_name in enumerate([file for file in os.listdir() if file != "ALL_SITES.nc"]):

    if file_name[0] == 0:
        ds_out = xr.open_dataset(file_name[1]).assign_coords(
            {
                "site": (
                    "site",
                    [
                        "{site_name}, ({lat}, {lon})".format(
                            site_name=file_name[1].split(".")[0],
                            lat=xr.open_dataset(file_name[1])["latitude"][0].values,
                            lon=xr.open_dataset(file_name[1])["longitude"][0].values,
                        )
                    ],
                )
            }
        )
    else:
        ds_out = xr.concat(
            [
                ds_out,
                xr.open_dataset(file_name[1]).assign_coords(
                    {
                        "site": (
                            "site",
                            [
                                "{site_name}, ({lat}, {lon})".format(
                                    site_name=file_name[1].split(".")[0],
                                    lat=xr.open_dataset(file_name[1])["latitude"][
                                        0
                                    ].values,
                                    lon=xr.open_dataset(file_name[1])["longitude"][
                                        0
                                    ].values,
                                )
                            ],
                        )
                    }
                ),
            ],
            dim="site",
        )

# Write to netcdf as ALL_SITES.nc
# ds_out.to_netcdf('ALL_SITES.nc')

In [82]:
ds_out

## IBM LLM


In [7]:
def get_credentials():
    return {
        "url": "https://us-south.ml.cloud.ibm.com",
        "apikey": "2xP3cDUcNQM7DMlrwmzZeEYF42pDJISl1vk1qMPr_Fie",
    }


model_id = "ibm/granite-13b-instruct-v2"

parameters = {
    "decoding_method": "greedy",
    "max_new_tokens": 8191,
    "repetition_penalty": 1,
}

project_id = "091feb32-646c-44a6-99e0-6aeb4e7963e3"

model = Model(
    model_id=model_id,
    params=parameters,
    credentials=get_credentials(),
    project_id=project_id,
)

prompt_input = f"""

Here are some points. The latitude is in the first column and the longitude is in the second column:

{40, -73}
{41, -74.5}
{40.5, -72.5}
{32, -66.2}

Compute the distance between these points and avoid repetition. Which two points have the shortest distance between them?

"""

print("Submitting generation request...")
generated_response = model.generate_text(prompt=prompt_input)
print(generated_response)

Submitting generation request...


Failure during generate. (POST https://us-south.ml.cloud.ibm.com/ml/v1/text/generation?version=2024-08-30)
Status code: 403, body: {"errors":[{"code":"token_quota_reached","message":"Request of 1 token(s) from quota was rejected","more_info":"https://cloud.ibm.com/apidocs/watsonx-ai"}],"trace":"16d81d5d3efa5639a9e4cfdfd2f81ef9","status_code":403}


ApiRequestFailure: Failure during generate. (POST https://us-south.ml.cloud.ibm.com/ml/v1/text/generation?version=2024-08-30)
Status code: 403, body: {"errors":[{"code":"token_quota_reached","message":"Request of 1 token(s) from quota was rejected","more_info":"https://cloud.ibm.com/apidocs/watsonx-ai"}],"trace":"16d81d5d3efa5639a9e4cfdfd2f81ef9","status_code":403}

In [3]:
def get_credentials():
    return {
        "url": "https://us-south.ml.cloud.ibm.com",
        "apikey": "MWR_k-b6dOX0AwD10RVGt8Xm7XnfNNw30ve0LzUbXgaP",
    }


model_id = "ibm/granite-20b-multilingual"

parameters = {
    "decoding_method": "greedy",
    "max_new_tokens": 4096,
    "repetition_penalty": 1,
}

project_id = "091feb32-646c-44a6-99e0-6aeb4e7963e3"

model = Model(
    model_id=model_id,
    params=parameters,
    credentials=get_credentials(),
    project_id=project_id,
)

prompt_input = f"""
Task Description:

You are a language detection and translation assistant. For any given input text, your task is to:

Detect the language of the input text.
Translate the detected language in English.

You MUST return the response in the following JSON format:

{{
'language': '[Detected Language]',
'translation': '[Translated text]'
}}

Return these two properties in this JSON format and NOTHING else.

Example 1:
Input: "Bonjour, je m'appelle Marie."

Output: 
{{ 
    'language': 'French',
    'translation': 'Hello, my name is Marie.'
}}

Example 2:

Input: "¿Dónde está la biblioteca?"

Output: 
{{
'language': 'Spanish',
'translation': 'Where is the library?'
}}


Example 3:

Input: "Guten Tag, wie geht es Ihnen?"

Output: 
{{
'language': 'German',
'translation': 'Good day, how are you?'
}}

Example 4:

Input: "私は学生です。"

Output: 

{{
'language': 'Japanese',
'translation': 'I am a student.'
}}

Example 5:

Input: "Привет, как дела?"

Output: 

{{
'language': 'Russian',
'translation': 'Hi, how are you?'
}}

Now, please process the following input according to the task and return the JSON output:

Input: "¿Puedes recomendarme una buena película para ver?"
Output:
"""

print("Submitting generation request...")
generated_response = model.generate_text(prompt=prompt_input)
print(generated_response)

Submitting generation request...


KeyboardInterrupt: 

In [5]:
def get_credentials():
    return {
        "url": "https://us-south.ml.cloud.ibm.com",
        "apikey": "2xP3cDUcNQM7DMlrwmzZeEYF42pDJISl1vk1qMPr_Fie",
    }


model_id = "ibm/granite-20b-multilingual"

parameters = {
    "decoding_method": "greedy",
    "max_new_tokens": 100,
    "repetition_penalty": 1,
}

project_id = "091feb32-646c-44a6-99e0-6aeb4e7963e3"

model = Model(
    model_id=model_id,
    params=parameters,
    credentials=get_credentials(),
    project_id=project_id,
)

prompt_input = "how are you?"

print("Submitting generation request...")
generated_response = model.generate_text(prompt=prompt_input)
print(generated_response)

Submitting generation request...


In [4]:
def get_credentials():
    return {
        "url": "https://us-south.ml.cloud.ibm.com",
        "apikey": "2xP3cDUcNQM7DMlrwmzZeEYF42pDJISl1vk1qMPr_Fie",
    }


model_id = "ibm/granite-13b-instruct-v2"

parameters = {
    "decoding_method": "greedy",
    "max_new_tokens": 8191,
    "repetition_penalty": 1,
}

project_id = "091feb32-646c-44a6-99e0-6aeb4e7963e3"

model = Model(
    model_id=model_id,
    params=parameters,
    credentials=get_credentials(),
    project_id=project_id,
)

prompt_input = f"""
What is the capital of the US state of Nebraska?
"""

print("Submitting generation request...")
generated_response = model.generate_text(prompt=prompt_input)
print(generated_response)

Submitting generation request...
Omaha
