In [68]:
import requests
import base64
from tqdm import tqdm

# HBase REST API 설정
# base_url = 'http://localhost:9090'
base_url = 'http://j11c207a.p.ssafy.io:9090'
table_name = 'industry_news'

# SALT 범위 및 시간 설정
max_salt = 10
start_row_prefix = 'T00001'
stop_row_prefix = 'T00001'
start_time = '1719801060'
stop_time = '1719850203' # 24/7/1 16시
# stop_time = '1722442203' # 24/7/31

# 결과를 저장할 리스트
results = []

# 스캔 요청을 위한 함수
def scan_hbase(salt):
    start_row = f"{salt:02d}{start_row_prefix}{start_time}"
    stop_row = f"{salt:02d}{stop_row_prefix}{stop_time}"
    
    # HBase REST API URL
    scan_url = f"{base_url}/{table_name}/scanner"
    
    # 요청 본문 생성 시 base64로 인코딩
    scan_body = {
        "startRow": base64.b64encode(start_row.encode()).decode(),
        "endRow": base64.b64encode(stop_row.encode()).decode(),
    }
    
    # 스캔 요청
    headers = {
        "Accept": "application/json",  # JSON 응답을 요청
        "Content-Type": "application/json"  # JSON 데이터 형식으로 전송
    }
    response = requests.post(scan_url, json=scan_body)
    
    if response.status_code == 201:
        # JSON 응답에서 Location 헤더를 통해 scanner_id 추출
        location = response.headers['Location']
        results.append(location)
        print(f"SALT {salt}: Data retrieved.")
    else:
        print(f"Error scanning SALT {salt}: {response.status_code}, {response.text}")

# 스캔 요청 수행
for salt in tqdm(range(max_salt + 1)):
    scan_hbase(salt)




 36%|███▋      | 4/11 [00:00<00:00, 16.55it/s]

SALT 0: Data retrieved.
SALT 1: Data retrieved.
SALT 2: Data retrieved.
SALT 3: Data retrieved.


 82%|████████▏ | 9/11 [00:00<00:00, 18.42it/s]

SALT 4: Data retrieved.
SALT 5: Data retrieved.
SALT 6: Data retrieved.
SALT 7: Data retrieved.
SALT 8: Data retrieved.


100%|██████████| 11/11 [00:00<00:00, 17.67it/s]

SALT 9: Data retrieved.
SALT 10: Data retrieved.





In [69]:
# 각 scanner_id에 대해 GET 요청하여 결과 저장
final_results = []

# JSON 응답을 요청하는 헤더
headers = {
    "Accept": "application/json",  # JSON 응답을 요청
}

for scanner_id in results:
    while True:
        # GET 요청을 통해 데이터 가져오기
        get_url = scanner_id
        response = requests.get(get_url, headers=headers)
        
        if response.status_code == 200:
            # 성공적으로 데이터를 가져온 경우
            data = response.json()  # 응답을 JSON으로 파싱
            final_results.append(data)  # 결과를 저장
            # print(f"Data retrieved from {scanner_id}: {data}")
        elif response.status_code == 204:
            # 스캐너가 모두 소진된 경우
            print(f"No more data for {scanner_id}. Deleting scanner...")
            # DELETE 요청으로 자원 해제
            delete_response = requests.delete(scanner_id)
            if delete_response.status_code == 200:
                print(f"Scanner {scanner_id} deleted successfully.")
            else:
                print(f"Error deleting scanner {scanner_id}: {delete_response.status_code}, {delete_response.text}")
            break
        else:
            # 오류 발생 시 응답 출력
            print(f"Error retrieving data from {scanner_id}: {response.status_code}, {response.text}")
            break

# 최종 결과 출력
print("Final results:")
# print(final_results)


No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/1727309881513509cd9a5. Deleting scanner...
Scanner http://j11c207a.p.ssafy.io:9090/industry_news/scanner/1727309881513509cd9a5 deleted successfully.
No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/172730988156110905f6f. Deleting scanner...
Scanner http://j11c207a.p.ssafy.io:9090/industry_news/scanner/172730988156110905f6f deleted successfully.
No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/1727309881652b5494ac. Deleting scanner...
Scanner http://j11c207a.p.ssafy.io:9090/industry_news/scanner/1727309881652b5494ac deleted successfully.
No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273098817106b2d5d8. Deleting scanner...
Scanner http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273098817106b2d5d8 deleted successfully.
No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/172730988176098e6c6d. Deleting scanner...
Scanner htt

In [70]:
for data in final_results:
    print(len(data['Row']))

10
9
8
6
11
10
9
9
9
8


In [71]:
cnt = 0
for data in final_results:
    cnt+= len(data['Row'])
print(cnt)

89


In [72]:
final_results

[{'Row': [{'key': 'MDBUMDAwMDExNzE5ODI0MjgwNDcxNTRlNzVhMzZhM2ZjZjJlNTMxZWI5N2NhYjk3ZTA=',
    'Cell': [{'column': 'Y2Y6YXJ0aWNsZQ==',
      'timestamp': 1727274257216,
      '$': 'PEltYWdlVGFnPmh0dHBzOi8vaW1nMS5kYXVtY2RuLm5ldC90aHVtYi9SNjU4eDAucTcwLz9mbmFtZT1odHRwczovL3QxLmRhdW1jZG4ubmV0L25ld3MvMjAyNDA3LzAxL3lvbmhhcC8yMDI0MDcwMTA4NTgxMTY5MXVkbnouanBnPC9JbWFnZVRhZz4K7Jqw7LK06rWtIOu5hOymiO2VjyDssrTtgazsubTrk5wgW+yasOygleyCrOyXheuzuOu2gCDsoJzqs7UuIOyerO2MkOunpCDrsI8gREIg6riI7KeAXQoK4payIOqzvO2Vmeq4sOyIoOygleuztO2GteyLoOu2gCDsmrDsoJXsgqzsl4Xrs7jrtoDripQg7Jqw7Y64IOyEnOu5hOyKpCDsnbTsmqkg6riI7JWhIDUl66W8IOy6kOyLnOuwseycvOuhnCDso7zripQgJ+yasOyytOq1rSDruYTspojtlY8oQml6Rml0KSDssrTtgazsubTrk5wn66W8IDHsnbwg7Lac7Iuc7ZaI64ukLiDrs7TslYgsIOuwqeyXrSwg64yA7JesIOuTsSDsl4XsooUg7J6Q64+ZIOuCqeu2gCDsi5wgNSUsIOydjOyLneygkMK364yA7ZiV66eI7Yq4wrfsoITthrXsi5zsnqUg7J207JqpIOyLnCAzJSDsupDsi5zrsLHsnbQg7JuUIO2Gte2VqSDtlZzrj4Qg64K07JeQ7IScIOyngOq4ieuQmOupsCDsoITsnpDshLjquIjqs4TsgrDshJwg67Cc7ZaJIOuTsSDshLjrrLQg7KeA7JuQ

# batch

In [73]:
import requests
import base64
from tqdm import tqdm

# HBase REST API 설정
# base_url = 'http://localhost:9090'
base_url = 'http://j11c207a.p.ssafy.io:9090'
table_name = 'industry_news'

# SALT 범위 및 시간 설정
max_salt = 10
start_row_prefix = 'T00001'
stop_row_prefix = 'T00001'
start_time = '1719801060'
stop_time = '1719850203' # 24/7/1 16시
# stop_time = '1722442203' # 24/7/31

# 결과를 저장할 리스트
results = []

# 스캔 요청을 위한 함수
def scan_hbase(salt):
    start_row = f"{salt:02d}{start_row_prefix}{start_time}"
    stop_row = f"{salt:02d}{stop_row_prefix}{stop_time}"
    
    # HBase REST API URL
    scan_url = f"{base_url}/{table_name}/scanner"
    
    # 요청 본문 생성 시 base64로 인코딩
    scan_body = {
        "startRow": base64.b64encode(start_row.encode()).decode(),
        "endRow": base64.b64encode(stop_row.encode()).decode(),
    }
    
    # 스캔 요청
    headers = {
        "Accept": "application/json",  # JSON 응답을 요청
        "Content-Type": "application/json"  # JSON 데이터 형식으로 전송
    }
    response = requests.post(scan_url, json=scan_body)
    
    if response.status_code == 201:
        # JSON 응답에서 Location 헤더를 통해 scanner_id 추출
        location = response.headers['Location']
        results.append(location)
        print(f"SALT {salt}: Data retrieved.")
    else:
        print(f"Error scanning SALT {salt}: {response.status_code}, {response.text}")

# 스캔 요청 수행
for salt in tqdm(range(max_salt + 1)):
    scan_hbase(salt)




 27%|██▋       | 3/11 [00:00<00:00, 20.42it/s]

SALT 0: Data retrieved.
SALT 1: Data retrieved.
SALT 2: Data retrieved.
SALT 3: Data retrieved.


 73%|███████▎  | 8/11 [00:00<00:00, 17.88it/s]

SALT 4: Data retrieved.
SALT 5: Data retrieved.
SALT 6: Data retrieved.
SALT 7: Data retrieved.


100%|██████████| 11/11 [00:00<00:00, 17.58it/s]

SALT 8: Data retrieved.
SALT 9: Data retrieved.
SALT 10: Data retrieved.





In [74]:
# 각 scanner_id에 대해 GET 요청하여 결과 저장
final_results_2 = []

# JSON 응답을 요청하는 헤더
headers = {
    "Accept": "application/json",  # JSON 응답을 요청
}

for scanner_id in results:
    while True:
        # GET 요청을 통해 데이터 가져오기
        get_url = scanner_id
        response = requests.get(get_url, headers=headers)
        
        if response.status_code == 200:
            # 성공적으로 데이터를 가져온 경우
            data = response.json()  # 응답을 JSON으로 파싱
            final_results_2.append(data)  # 결과를 저장
            # print(f"Data retrieved from {scanner_id}: {data}")
        elif response.status_code == 204:
            # 스캐너가 모두 소진된 경우
            print(f"No more data for {scanner_id}. Deleting scanner...")
            # DELETE 요청으로 자원 해제
            delete_response = requests.delete(scanner_id)
            if delete_response.status_code == 200:
                print(f"Scanner {scanner_id} deleted successfully.")
            else:
                print(f"Error deleting scanner {scanner_id}: {delete_response.status_code}, {delete_response.text}")
            break
        else:
            # 오류 발생 시 응답 출력
            print(f"Error retrieving data from {scanner_id}: {response.status_code}, {response.text}")
            break

# 최종 결과 출력
print("Final results:")
# print(final_results)


No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099013523d5693fd. Deleting scanner...
Scanner http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099013523d5693fd deleted successfully.
No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099014026d84673d. Deleting scanner...
Scanner http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099014026d84673d deleted successfully.
No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099014502efd4bad. Deleting scanner...
Scanner http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099014502efd4bad deleted successfully.
No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/1727309901537eb51160. Deleting scanner...
Scanner http://j11c207a.p.ssafy.io:9090/industry_news/scanner/1727309901537eb51160 deleted successfully.
No more data for http://j11c207a.p.ssafy.io:9090/industry_news/scanner/172730990158238215b95. Deleting scanner...
Scanner 

In [75]:
final_results_2

[{'Row': [{'key': 'MDBUMDAwMDExNzE5ODI0MjgwNDcxNTRlNzVhMzZhM2ZjZjJlNTMxZWI5N2NhYjk3ZTA=',
    'Cell': [{'column': 'Y2Y6YXJ0aWNsZQ==',
      'timestamp': 1727274257216,
      '$': 'PEltYWdlVGFnPmh0dHBzOi8vaW1nMS5kYXVtY2RuLm5ldC90aHVtYi9SNjU4eDAucTcwLz9mbmFtZT1odHRwczovL3QxLmRhdW1jZG4ubmV0L25ld3MvMjAyNDA3LzAxL3lvbmhhcC8yMDI0MDcwMTA4NTgxMTY5MXVkbnouanBnPC9JbWFnZVRhZz4K7Jqw7LK06rWtIOu5hOymiO2VjyDssrTtgazsubTrk5wgW+yasOygleyCrOyXheuzuOu2gCDsoJzqs7UuIOyerO2MkOunpCDrsI8gREIg6riI7KeAXQoK4payIOqzvO2Vmeq4sOyIoOygleuztO2GteyLoOu2gCDsmrDsoJXsgqzsl4Xrs7jrtoDripQg7Jqw7Y64IOyEnOu5hOyKpCDsnbTsmqkg6riI7JWhIDUl66W8IOy6kOyLnOuwseycvOuhnCDso7zripQgJ+yasOyytOq1rSDruYTspojtlY8oQml6Rml0KSDssrTtgazsubTrk5wn66W8IDHsnbwg7Lac7Iuc7ZaI64ukLiDrs7TslYgsIOuwqeyXrSwg64yA7JesIOuTsSDsl4XsooUg7J6Q64+ZIOuCqeu2gCDsi5wgNSUsIOydjOyLneygkMK364yA7ZiV66eI7Yq4wrfsoITthrXsi5zsnqUg7J207JqpIOyLnCAzJSDsupDsi5zrsLHsnbQg7JuUIO2Gte2VqSDtlZzrj4Qg64K07JeQ7IScIOyngOq4ieuQmOupsCDsoITsnpDshLjquIjqs4TsgrDshJwg67Cc7ZaJIOuTsSDshLjrrLQg7KeA7JuQ

In [76]:
final_results[0]['Row']

[{'key': 'MDBUMDAwMDExNzE5ODI0MjgwNDcxNTRlNzVhMzZhM2ZjZjJlNTMxZWI5N2NhYjk3ZTA=',
  'Cell': [{'column': 'Y2Y6YXJ0aWNsZQ==',
    'timestamp': 1727274257216,
    '$': 'PEltYWdlVGFnPmh0dHBzOi8vaW1nMS5kYXVtY2RuLm5ldC90aHVtYi9SNjU4eDAucTcwLz9mbmFtZT1odHRwczovL3QxLmRhdW1jZG4ubmV0L25ld3MvMjAyNDA3LzAxL3lvbmhhcC8yMDI0MDcwMTA4NTgxMTY5MXVkbnouanBnPC9JbWFnZVRhZz4K7Jqw7LK06rWtIOu5hOymiO2VjyDssrTtgazsubTrk5wgW+yasOygleyCrOyXheuzuOu2gCDsoJzqs7UuIOyerO2MkOunpCDrsI8gREIg6riI7KeAXQoK4payIOqzvO2Vmeq4sOyIoOygleuztO2GteyLoOu2gCDsmrDsoJXsgqzsl4Xrs7jrtoDripQg7Jqw7Y64IOyEnOu5hOyKpCDsnbTsmqkg6riI7JWhIDUl66W8IOy6kOyLnOuwseycvOuhnCDso7zripQgJ+yasOyytOq1rSDruYTspojtlY8oQml6Rml0KSDssrTtgazsubTrk5wn66W8IDHsnbwg7Lac7Iuc7ZaI64ukLiDrs7TslYgsIOuwqeyXrSwg64yA7JesIOuTsSDsl4XsooUg7J6Q64+ZIOuCqeu2gCDsi5wgNSUsIOydjOyLneygkMK364yA7ZiV66eI7Yq4wrfsoITthrXsi5zsnqUg7J207JqpIOyLnCAzJSDsupDsi5zrsLHsnbQg7JuUIO2Gte2VqSDtlZzrj4Qg64K07JeQ7IScIOyngOq4ieuQmOupsCDsoITsnpDshLjquIjqs4TsgrDshJwg67Cc7ZaJIOuTsSDshLjrrLQg7KeA7JuQIOyEnOu5hOyKpOu

In [77]:
final_results_2[0]['Row']

[{'key': 'MDBUMDAwMDExNzE5ODI0MjgwNDcxNTRlNzVhMzZhM2ZjZjJlNTMxZWI5N2NhYjk3ZTA=',
  'Cell': [{'column': 'Y2Y6YXJ0aWNsZQ==',
    'timestamp': 1727274257216,
    '$': 'PEltYWdlVGFnPmh0dHBzOi8vaW1nMS5kYXVtY2RuLm5ldC90aHVtYi9SNjU4eDAucTcwLz9mbmFtZT1odHRwczovL3QxLmRhdW1jZG4ubmV0L25ld3MvMjAyNDA3LzAxL3lvbmhhcC8yMDI0MDcwMTA4NTgxMTY5MXVkbnouanBnPC9JbWFnZVRhZz4K7Jqw7LK06rWtIOu5hOymiO2VjyDssrTtgazsubTrk5wgW+yasOygleyCrOyXheuzuOu2gCDsoJzqs7UuIOyerO2MkOunpCDrsI8gREIg6riI7KeAXQoK4payIOqzvO2Vmeq4sOyIoOygleuztO2GteyLoOu2gCDsmrDsoJXsgqzsl4Xrs7jrtoDripQg7Jqw7Y64IOyEnOu5hOyKpCDsnbTsmqkg6riI7JWhIDUl66W8IOy6kOyLnOuwseycvOuhnCDso7zripQgJ+yasOyytOq1rSDruYTspojtlY8oQml6Rml0KSDssrTtgazsubTrk5wn66W8IDHsnbwg7Lac7Iuc7ZaI64ukLiDrs7TslYgsIOuwqeyXrSwg64yA7JesIOuTsSDsl4XsooUg7J6Q64+ZIOuCqeu2gCDsi5wgNSUsIOydjOyLneygkMK364yA7ZiV66eI7Yq4wrfsoITthrXsi5zsnqUg7J207JqpIOyLnCAzJSDsupDsi5zrsLHsnbQg7JuUIO2Gte2VqSDtlZzrj4Qg64K07JeQ7IScIOyngOq4ieuQmOupsCDsoITsnpDshLjquIjqs4TsgrDshJwg67Cc7ZaJIOuTsSDshLjrrLQg7KeA7JuQIOyEnOu5hOyKpOu

# 멀티스레드

In [78]:
import requests
import concurrent.futures

# 각 scanner_id에 대해 GET 요청하여 결과 저장
final_results = []

# JSON 응답을 요청하는 헤더
headers = {
    "Accept": "application/json",  # JSON 응답을 요청
}

def process_scanner(scanner_id):
    while True:
        # GET 요청을 통해 데이터 가져오기
        get_url = scanner_id
        response = requests.get(get_url, headers=headers)
        
        if response.status_code == 200:
            # 성공적으로 데이터를 가져온 경우
            data = response.json()  # 응답을 JSON으로 파싱
            return data
        elif response.status_code == 204:
            # 스캐너가 모두 소진된 경우
            print(f"No more data for {scanner_id}. Deleting scanner...")
            # DELETE 요청으로 자원 해제
            delete_response = requests.delete(scanner_id)
            if delete_response.status_code == 200:
                print(f"Scanner {scanner_id} deleted successfully.")
            else:
                print(f"Error deleting scanner {scanner_id}: {delete_response.status_code}, {delete_response.text}")
            break
        else:
            # 오류 발생 시 응답 출력
            print(f"Error retrieving data from {scanner_id}: {response.status_code}, {response.text}")
            break
    return None

# 멀티스레드 실행
with concurrent.futures.ThreadPoolExecutor() as executor:
    # 각 scanner_id에 대해 스레드 생성 및 실행
    future_to_scanner = {executor.submit(process_scanner, scanner_id): scanner_id for scanner_id in results}
    
    # 스레드가 완료될 때까지 기다리며 결과 저장
    for future in concurrent.futures.as_completed(future_to_scanner):
        scanner_id = future_to_scanner[future]
        try:
            data = future.result()
            if data:
                final_results.append(data)
        except Exception as exc:
            print(f"Scanner {scanner_id} generated an exception: {exc}")

# 최종 결과 출력
print("종료:")


Error retrieving data from http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099019073ece202b: 404, Not found

Error retrieving data from http://j11c207a.p.ssafy.io:9090/industry_news/scanner/172730990163151e2d26a: 404, Not found

Error retrieving data from http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099017435d1552a5: 404, Not found

Error retrieving data from http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099014026d84673d: 404, Not found

Error retrieving data from http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099013523d5693fd: 404, Not found

Error retrieving data from http://j11c207a.p.ssafy.io:9090/industry_news/scanner/172730990158238215b95: 404, Not found

Error retrieving data from http://j11c207a.p.ssafy.io:9090/industry_news/scanner/172730990168951d4ff4a: 404, Not found

Error retrieving data from http://j11c207a.p.ssafy.io:9090/industry_news/scanner/17273099017992c1e941a: 404, Not found

Error retrieving data from http://j11c20

In [79]:
for data in final_results:
    print(len(data['Row']))

# 뉴스조회

In [82]:
import hashlib  # 해시 생성
import base64  # Base64 인코딩 및 디코딩
import requests  # HTTP 요청을 위한 라이브러리
from datetime import datetime, timezone  # 날짜 및 시간 변환

type_mapping = {
    "finance": "T00001",
    "industry": "T00002",
    "employ": "T00003",
    "autos": "T00004",
    "stock": "T00005",
    "estate": "T00006",
    "consumer": "T00007",
    "worldeconomy": "T00008",
    "coin": "T00009",
    "pension": "T00010",
    "policy": "T00011",
    "startup": "T00012"
}

def convert_to_utc_timestamp(datetime_str: str) -> int:
    """
    Convert the given datetime string to a UTC timestamp.
    :param datetime_str: String in the format '%Y-%m-%d %H:%M:%S'
    :return: UTC timestamp
    """
    naive_datetime = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
    return int(naive_datetime.timestamp())

def hash_news_id(news_id: str) -> str:
    """Generate a 16-byte hash from the news ID."""
    return hashlib.md5(news_id.encode()).hexdigest()

def get_salt(news_id_hash):
    return str(int(news_id_hash, 16) % 10).zfill(2)

def get_mapped_type(news_type):
    """Generate the industry news table key."""
    # Type mapping
    return type_mapping.get(news_type, "UnknownType")

def get_timestamp_by_key(row_key: str):
    """Get a row from HBase by its key."""
    hbase_url = 'http://j11c207a.p.ssafy.io:9090'
    url = f"{hbase_url}/industry_news_keys/{row_key}"
    headers = {
        'Accept': 'application/json'
    }
    
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        # Extracting the base64 encoded value
        base64_value = data['Row'][0]['Cell'][0]['$']
        # Decoding the base64 value => 여기서 우선 datetime str나옴('2024-07-01 15:53:00')
        datetime_str = decode_base64(base64_value)
        timestamps = convert_to_utc_timestamp(datetime_str)
        return timestamps
    else:
        print(f"Error fetching data for row key {row_key}: {response.status_code}, {response.text}")
        return None

def decode_base64(encoded_value: str) -> str:
    """Decode a base64 encoded string."""
    return base64.b64decode(encoded_value).decode('utf-8')

def get_news_by_key(news_type: str, timestamps: str, news_id_hash: str, column_family: str):
    """Get a row from the industry_news table by constructing the row key."""
    salt = get_salt(news_id_hash)
    
    # Constructing the row key
    row_key = f"{salt}{news_type}{timestamps}{news_id_hash}"

    # HBase REST API endpoint
    hbase_url = 'http://j11c207a.p.ssafy.io:9090'
    url = f"{hbase_url}/industry_news/{row_key}"
    headers = {
        'Accept': 'application/json'
    }
    
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        result = {}
        
        if 'Row' in data and len(data['Row']) > 0:
            row_data = data['Row'][0]
            
            for cell in row_data['Cell']:
                column = cell['column']  # Get the column
                decoded_column = base64.b64decode(column).decode('utf-8')  # Decode the column
                value = cell['$']  # Get the value
                decoded_value = base64.b64decode(value).decode('utf-8')  # Decode the value
                
                # Remove 'cf:' from the column name for the result dictionary
                clean_column_name = decoded_column.replace(f"{column_family}:", "")
                result[clean_column_name] = decoded_value

        return result


In [83]:
# Random Access

# Client한테 들어왔다 가정하기
news_id = "20240701112610199"
news_type = 'estate'

# 1. 우선 클라이언트로부터 받은 정보를 가지고 timestamp 추출
news_id_hash = hash_news_id(news_id)
mapped_type = get_mapped_type(news_type)


row_key = news_id_hash + mapped_type

# 2. industry_news_keys 테이블 찌름
timestamps = get_timestamp_by_key(row_key)

# 2. industry_news찌름
column_family = 'cf'
article = get_news_by_key(mapped_type, timestamps, news_id_hash, column_family)
article

{'article': '<ImageTag>https://img4.daumcdn.net/thumb/R658x0.q70/?fname=https://t1.daumcdn.net/news/202407/01/ned/20240701112611630psrx.jpg</ImageTag>\n상도푸르지오클라베뉴 투시도 [대우건설 제공]\n\n분양가가 오르면서 골칫거리로 취급되던 미분양도 순차적으로 계약이 완료되고 있다. 특히 수도권은 고분양가 논란이 일며 오랜 기간 판매가 지지부진했던 단지들도 ‘완판’에 한 걸음 다가서는 모양새다.\n\n1일 분양업계에 따르면 경기 옥정신도시 양주옥정LH엘리프는 지난 5월부터 선착순 계약을 실시했다. 이 단지는 지난해 7월 입주를 시작했지만 선착순 계약을 시작할 당시 전체 70% 상당이 미분양 상태였다. 이에 한국토지주택공사(LH)는 발코니 확장을 옵션을 무상으로 지원하고 잔금 일부에 대해 무이자 금융 혜택을 제공하며 계약률을 높여 왔다. 그러다 최근 상황이 급변했다. 계약 상담이 늘어나면서 총 1409가구 가운데 900가구에 달하는 전용 59㎡가 모두 계약이 완료된 것이다. 이 평형 분양가는 2억7130만~2억9497만원대다. 전용 51㎡ 또한 70여가구만 남았다. LH 관계자는 “이달 계약 문의가 급격하게 늘었고, 상담 예약도 꾸준히 잡히고 있다”면서 “현재 계약 진행 중으로 잔여물량은 향후 더 줄어들 예정”이라고 설명했다.\n\n고분양가 논란이 일며 임의공급 7차까지 간 서울시 동작구 상도동 상도푸르지오클라베뉴도 ‘완판’을 앞두고 있다. 업계에 따르면 시행사 물량까지 총 30여가구만 남은 상황으로 전해진다. 단지는 지난해 9월 700가구 규모로 분양을 시작했고 10월부터 선착순 분양에 돌입했다. 후분양이라 자금 계획이 빠듯하고, 전용 84㎡가 12억원을 웃돌면서 부정적인 시각도 나왔지만, 결국 전 물량 계약에 가까워지고 있는 것이다.\n\n임의공급 12차까지 간 구로구 가리봉동 남구로역 동일 센타시아도 미분양 물량이 한 자릿수(6가구) 대로 