In [2]:
import os
from pathlib import Path, PurePath
import logging
from weaviate.util import generate_uuid5
import requests
from tqdm import tqdm
import json


from py2neo import Graph, Node, Relationship
from py2neo.bulk import create_nodes, merge_nodes, merge_relationships

# Get Neo4j client
graph = Graph("bolt://localhost:7687", auth=("neo4j", "neo4j"))


cwd = os.getcwd()
pd = Path(cwd).parents[0]


# Setting up logs
log_dir = os.path.join(cwd, "logs")
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
file_handler = logging.FileHandler(
    os.path.join("logs", "neo4j_dataset_platform_index_logs.log")
)
file_handler.setLevel(logging.WARNING)
logger.addHandler(file_handler)
log_formatter = logging.Formatter("%(asctime)s|%(name)s|%(message)s")
file_handler.setFormatter(log_formatter)


# Path to data folder
data_path = os.path.join(pd, "data")

# Path to Dataset/Collection jsons
collection_jsons_path = os.path.join(data_path, "PROD_20230409")

collections_dict = {}
collection_jsons_list = [
    os.path.join(collection_jsons_path, file)
    for file in os.listdir(collection_jsons_path)
    if file.endswith(".json")
]


def create_dataset_platform(batch, data_dict, graph=graph):

    platform_id = data_dict["platform_id"]
    dataset_id = data_dict["dataset_id"]

    batch.append([dataset_id, {}, platform_id])

    return batch


batch = []
for file in collection_jsons_list:
    with open(file) as json_file:
        data = json.load(json_file)
        dataset_id = generate_uuid5(data["ShortName"])
        for platform in data["Platforms"]:
            platform_id = generate_uuid5(platform["ShortName"])
            data_dict = {"platform_id": platform_id, "dataset_id": dataset_id}
            create_dataset_platform(batch=batch, data_dict=data_dict, graph=graph)
#             print(data["ShortName"]+'---->'+platform["ShortName"])
#             print(dataset_id+'---->'+platform_id)


merge_relationships(
    graph.auto(),
    batch,
    "HAS_PLATFORM",
    start_node_key=("Dataset", "globalId"),
    end_node_key=("Platform", "globalId"),
)
merge_relationships(
    graph.auto(),
    batch,
    "PLATFORM_OF_DATASET",
    start_node_key=("Platform", "globalId"),
    end_node_key=("Dataset", "globalId"),
)

PRECIP_SSMI_F15---->DMSP 5D-3/F15
9d486e2c-9219-51a3-9278-1228ef8eb5b4---->6d790f75-f503-5ddc-9f75-bce5ad3238a4
M2I3NVCHM---->MERRA-2
716349ca-bdde-57e5-8849-06d2a9f0936d---->6c3c1072-2943-5a74-93f6-6e9583b602aa
AIRX3ST8---->Aqua
97f3d0bf-94d4-5cde-9e72-fc31705e2819---->d8e30028-f813-51ec-bbe0-12282466a66a
OMPS_NPP_LP_L1G_EV---->Suomi-NPP
4f129c8d-9a27-559c-89ff-f3719a6f2cf5---->30f86105-41b9-5eca-a06b-ddf77dda1007
ACOS_L2_Lite_SIF---->GOSAT
febad0d4-63a0-565f-a975-90d5180829a5---->3fcd5440-a655-5387-9b42-e31e84707356
ML3DZHOCL---->Aura
89b02953-2713-5b13-95e9-02138f1e9c6c---->e3d2ab6d-15ab-5a41-9dc2-3f90b03f807b
M2T1NXOCN---->MERRA-2
d496cf55-7f6a-5e5b-952a-4247ea5a1b7d---->6c3c1072-2943-5a74-93f6-6e9583b602aa
TRMM_3B42RT_Daily---->TMPA
740523b4-674f-5d62-b120-ac0e0a7f470c---->a7d50dd7-c163-54ff-9c4e-ebb66f9ce02c
GSSTF_F14---->DMSP 5D-2/F14
545d1d27-12e4-5be8-8635-77eab77c6813---->db457f9b-37c4-5d65-8384-090bb4a869f3
SNDRJ1ML2RMS---->NOAA-20
e6cffd14-ac88-57b7-8d27-ca8eb1c12b3d---->24

CAR_CLAMS_L1C---->Convair-580
24b2eb37-219b-5493-8369-2a02df90b394---->890f2da7-cd69-5027-a9c0-ea6cd4f85f08
SBUV2N11L3zm---->NOAA-11
5764da82-9df4-57d9-823f-c521bf376f31---->4b118893-3348-5e0e-81c5-c7d110f8ec83
HIRMLS3IWC---->Aura
38481145-7f7e-55e7-9a04-f7d77f09af0d---->e3d2ab6d-15ab-5a41-9dc2-3f90b03f807b
GPM_2HSLH_TRMM---->TRMM
832e412c-668b-5066-a37d-28b23aaa0596---->26ca51b1-8234-5d29-941d-9f7f65347529
OMHCHOG---->Aura
54619113-79c8-5f38-ac9c-4c5a74332afc---->e3d2ab6d-15ab-5a41-9dc2-3f90b03f807b
S5P_L2__NP_BD6---->Sentinel-5P
8edc3768-9ab2-5071-bb09-8984306105db---->8c3a6a75-f8b6-5ea6-93a7-e435d246a9c6
GPSROZPBLA---->TSX
7c3584bd-4f73-54b1-aef6-de284aed1485---->db8261a9-a788-5b19-b5e0-46bb8eda5932
GPSROZPBLA---->COSMIC/FORMOSAT-3
7c3584bd-4f73-54b1-aef6-de284aed1485---->cf05da86-6dc8-5909-9359-66b2ddbd0c2f
SOR4XPSD_HIGH---->SORCE
b6056c24-6c71-58a8-ba41-a3f119f7dd31---->f1cde523-8b27-58b5-bf8b-b92a8ec30bdb
MATMNXINT---->MERRA
5dfea56b-8c84-579a-a6f8-030d9d066392---->0cc18cf2-deb0-

TRPSDL2NH3CRS1FS---->JPSS-1
216d4991-cfd4-5cf7-a789-065df2ab0d09---->f1db6fd0-460a-5290-bbd0-f9cfbd4bae99
TRPSDL2O3TRPOMIFS---->Sentinel-5P
fd74979c-a5e0-5f5a-ace2-000ee722ea3f---->8c3a6a75-f8b6-5ea6-93a7-e435d246a9c6
SOR3SOLD_MGII---->SORCE
51c3f4fb-ade0-53b5-b875-bfff399d24d4---->f1cde523-8b27-58b5-bf8b-b92a8ec30bdb
ATMOSL1---->Spacelab-3
ede362ff-1864-5f8c-86d4-20ab01e56149---->a44c55a5-0f23-54a9-9d43-e154d835cf90
ATMOSL1---->ATLAS-1
ede362ff-1864-5f8c-86d4-20ab01e56149---->c4a8f121-d059-5a44-b660-d2eb6fbf317b
ATMOSL1---->ATLAS-2
ede362ff-1864-5f8c-86d4-20ab01e56149---->dd5074fa-2f0a-5474-a57c-73c662d36156
ATMOSL1---->ATLAS-3
ede362ff-1864-5f8c-86d4-20ab01e56149---->28829eb4-45a1-5463-85aa-45e4dd8842be
AIRICRAD---->Aqua
77ce4868-d28c-5d40-94e3-1a3f40d1c086---->d8e30028-f813-51ec-bbe0-12282466a66a
SNDRSNML3DRMS---->Suomi-NPP
536f2347-7cad-56ea-9c80-6e859c0fa25b---->30f86105-41b9-5eca-a06b-ddf77dda1007
ML2CH3CN---->Aura
5b41da5d-70ee-5164-b790-4421b613dcbd---->e3d2ab6d-15ab-5a41-9dc2-

OMPS_NPP_NMSO2_PCA_L3_DAILY---->Suomi-NPP
24467f19-921c-5cd2-9b14-fef99b6886ff---->30f86105-41b9-5eca-a06b-ddf77dda1007
HRIRN1L1---->Nimbus-1
761f56bc-24d4-5050-b31b-6a95a42aa682---->59c5a1b6-9ead-58ee-af8a-a744581cdf4d
MA_MON_CONV_OMF---->MERRA
76249618-0137-5f49-a127-1b625d53cfdd---->0cc18cf2-deb0-560f-8856-86186cfbe623
GPM_2AGPROFNOAA19MHS_CLIM---->NOAA-19
a721f0f2-693a-59f5-8d27-29957de72c60---->e97e3dd7-b0af-5ca8-b7af-1545623dc944
GPM_3GCSH_TRMM---->TRMM
ad2313d0-e5f4-5736-bb2d-5fa2e3d6196b---->26ca51b1-8234-5d29-941d-9f7f65347529
GPM_3HSLH_TRMM---->TRMM
c6ae400a-6fab-5300-bb3b-9018f380aa2c---->26ca51b1-8234-5d29-941d-9f7f65347529
GPM_3CMB_TRMM_DAY---->TRMM
7ee4e6da-097e-5763-864d-de022da8b03c---->26ca51b1-8234-5d29-941d-9f7f65347529
NLDAS_FORA0125_H---->Forcing-LSM
abd277e3-d1de-51ff-8916-6385eccf8cc4---->66700c71-7b93-5be3-83e2-8b63888b214c
GPM_3GPROFF16SSMIS_DAY---->DMSP 5D-3/F16
9537e24d-56fd-516b-af19-b947c5719ef9---->a065386d-b923-52d4-a8bb-ead95c4086b7
TOMSEPL3ztoz---->EP-T

VISSRGOES1IMVIS---->GOES-1
22a4bb18-1156-525a-b12b-35e82ec3c731---->8db89d11-f034-55e1-b4c0-818dd7bc2d1d
MA_SSU_TIROSN_OBS---->MERRA
44a0cc1c-6576-572f-a442-075f7c1d2ff5---->0cc18cf2-deb0-560f-8856-86186cfbe623
GPM_BASEGPMGMI_RSS---->GPM
a977e8ca-0133-5a06-b840-f063ef18223f---->d2f686c5-471d-5822-8ec3-073a84f02d5e
CAR_INTEXB_BRDF---->J-31
13856df2-aa5f-58a9-86ad-4e8b4324f6a3---->4b4aee73-7e79-53f2-bd4f-3d952a91ee94
UARWI3AL---->UARS
6dfe7349-27cd-50d9-9ea5-e720d163f888---->e259bb4e-f5df-5a94-a3f5-704fec2a44b1
M2IUNPANA---->MERRA-2
b0c64feb-e8bc-595c-a5e1-0ebb05784a1c---->6c3c1072-2943-5a74-93f6-6e9583b602aa
GPM_2AGPROFNOAA20ATMS_CLIM---->NOAA-20
bea43ef0-2d83-525e-92f3-cd8d76bf5e47---->24625b48-23b9-534f-93ce-95d4856d3337
TRPSDL2ALLCRSMGBEI---->Suomi-NPP
c763c63b-5bf7-5240-8aad-1849134876f1---->30f86105-41b9-5eca-a06b-ddf77dda1007
GPM_3GPROFGPMGMI---->GPM
84feb03b-ba20-525d-8600-589216dad9be---->d2f686c5-471d-5822-8ec3-073a84f02d5e
S5P_L2__HCHO___HiR---->Sentinel-5P
2e8d7b30-a854-5aa5-