# 데이터셋 - 쿼리에 따른 검색

## 작동 방식

In [2]:
# 예시 데이터

# query
with open('../data/queries.txt', 'r', encoding='utf-8') as f:
    queries = [line.strip() for line in f if line.strip()]

In [4]:
import os, requests
import pandas as pd
from dotenv import load_dotenv

load_dotenv(override=True)

API_KEY = os.getenv("DATAON_SEARCH_API_KEY")
assert API_KEY and API_KEY.strip(), "환경변수(DATAON_API_KEY)가 비어있어요!"

url = "https://dataon.kisti.re.kr/rest/api/search/dataset/"
df = pd.DataFrame()
for query in queries:
    params = {"key": API_KEY, "query": query, "from": 0, "size": 20}
    # key / CHAR / 필수 / API_KEY
    # query / CHAR / 필수 / 검색키워드
    # from / CHAR / 옵션 / 페이지시작위치
    # size / CHAR / 옵션 / 페이지사이즈

    res = requests.get(url, params=params, timeout=20)
    data = res.json()
    
    if "records" in data:
        tmp = pd.DataFrame(data["records"])
        tmp["query"] = query
        df = pd.concat([df, tmp], ignore_index=True)

df = df.drop_duplicates(subset='svc_id')
df.to_csv('../data/search_results_dataset.csv', index=False, encoding='utf-8')
print('\n[total dataset length]\n', len(df))

cleaned_df = (
    df[
        ['svc_id', 'dataset_title_etc_main', 'dataset_expl_etc_main','dataset_pub_dt_pc', 'dataset_kywd_etc_main', 'dataset_creator_etc_main', 'dataset_lndgpg', 'query']
    ]
    .rename(
        columns={
            'svc_id': 'ID',
            'dataset_title_etc_main': 'title',
            'dataset_expl_etc_main': 'description',
            'dataset_pub_dt_pc': 'pubyear',
            'dataset_kywd_etc_main': 'keyword',
            'dataset_creator_etc_main': 'author',
            'dataset_lndgpg': 'URL',
        }
    )
)

cleaned_df['category'] = 'dataset'

display(cleaned_df.head())


[total dataset length]
 55


Unnamed: 0,ID,title,description,pubyear,keyword,author,URL,query,category
0,21f86dd1deb83eec4fa6dc7fa58e0259,Gravity Core from Antarctic ROSS Sea (RS25-GC06),"2024/2025 Gravity core, Ross Sea (Little Ameri...",2025,EARTH SCIENCE;OCEANS;MARINE SEDIMENTS;SEDIMENT...,[(kangmi@kopri.re.kr)],https://dx.doi.org/doi:10.22663/KOPRI-KPDC-000...,Ross Sea core,dataset
1,bf558835530cb7b39d706b800eeb186e,Gravity Core from Antarctic ROSS Sea (RS25-GC04),"2024/2025 Gravity core, Ross Sea (Little Ameri...",2025,EARTH SCIENCE;OCEANS;MARINE SEDIMENTS;SEDIMENT...,[(kangmi@kopri.re.kr)],https://dx.doi.org/doi:10.22663/KOPRI-KPDC-000...,Ross Sea core,dataset
2,ad0056511f46859c6af080414380a82b,Gravity Core from Antarctic Ross Sea (RS19-GC20),"2018/2019 Gravity core, Ross Sea, Antarctic Cl...",2025,EARTH SCIENCE;OCEANS;MARINE SEDIMENTS;SEDIMENT...,[Hyo Jin Kim(hyojink@kopri.re.kr)],https://dx.doi.org/doi:10.22663/KOPRI-KPDC-000...,Ross Sea core,dataset
3,3164743e90350a466a75553600f3aa93,Gravity Core from Antarctic Ross Sea (RS19-GC17),"2018/2019 Gravity core, Ross Sea, Antarctic Cl...",2025,EARTH SCIENCE;OCEANS;MARINE SEDIMENTS;SEDIMENT...,[Hyo Jin Kim(hyojink@kopri.re.kr)],https://dx.doi.org/doi:10.22663/KOPRI-KPDC-000...,Ross Sea core,dataset
4,a380666c4cb468ef2e3473eb5faa47f5,Gravity Core from Antarctic Ross Sea (RS19-GC19),"2018/2019 Gravity core, Ross Sea, Antarctic Cl...",2025,EARTH SCIENCE;OCEANS;MARINE SEDIMENTS;SEDIMENT...,[Hyo Jin Kim(hyojink@kopri.re.kr)],https://dx.doi.org/doi:10.22663/KOPRI-KPDC-000...,Ross Sea core,dataset
