In [99]:
import pandas as pd
import numpy as np

import re
from itertools import chain
from collections import Counter

from google.cloud import bigquery
# from google.oauth2 import service_account

from datetime import datetime, timedelta

import requests
import warnings
from requests.packages.urllib3.exceptions import InsecureRequestWarning

# InsecureRequestWarning 경고 무시
warnings.simplefilter('ignore', InsecureRequestWarning)

In [3]:
KEY_PATH = ".config/"
servicekey_path = KEY_PATH + "serviceKey.json" ## 빅쿼리 외 다른 API 활용 위해
bigquerykey_path = KEY_PATH + "mido-project-426906-31b49963ac97.json"

warnings.filterwarnings("ignore")

In [4]:
# BigQuery 클라이언트 생성 함수
def create_bigquery_client(key_path):
    credentials = service_account.Credentials.from_service_account_file(key_path)
    client = bigquery.Client(credentials=credentials, project=credentials.project_id)
    return client

In [5]:
def save_dataframe_to_bigquery(df, dataset_id, table_id, key_path):
    # BigQuery 클라이언트 객체 생성
    client = create_bigquery_client(key_path)

    # 테이블 레퍼런스 생성
    table_ref = client.dataset(dataset_id).table(table_id)

    # 데이터프레임을 BigQuery 테이블에 적재
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = "WRITE_TRUNCATE"  # 기존 테이블 내용 삭제 후 삽입

    job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
    job.result()  # 작업 완료 대기

    print(f"Data inserted into table {table_id} successfully.")

In [6]:
def get_dataframe_from_bigquery(dataset_id, table_id, key_path):
    # BigQuery 클라이언트 생성
    client = create_bigquery_client(key_path)

    # 테이블 레퍼런스 생성
    table_ref = client.dataset(dataset_id).table(table_id)

    # 테이블 데이터를 DataFrame으로 변환
    df = client.list_rows(table_ref).to_dataframe()

    return df

In [7]:
# 오늘 날짜 설정
today = datetime.today().strftime('%Y%m%d')
ytday = (datetime.today() - timedelta(days=1)).strftime('%Y%m%d')

In [8]:
create_bigquery_client(bigquerykey_path).list_tables(f"{'mido-project-426906'}.{'g2b'}")

<google.api_core.page_iterator.HTTPIterator at 0x204a1028fd0>

#### 종합쇼핑몰 납품상세내역

In [35]:
# # 전체 종합쇼핑몰 데이터 가져오기
# tables = create_bigquery_client(bigquerykey_path).list_tables(f"{'mido-project-426906'}.{'g2b'}")
# table_list = [table.table_id for table in tables if 'shop_detail_df' in table.table_id]

# shop_df_list = []
# for tb_nm in table_list:
#     data = get_dataframe_from_bigquery('g2b', tb_nm,bigquerykey_path)
#     shop_df_list.extend(data.to_dict('records'))

In [137]:
all_shop_df = get_dataframe_from_bigquery('g2b', 'shop_detail_df_all',bigquerykey_path)

In [138]:
all_shop_df_fin = all_shop_df[['납품요구접수일자', '수요기관명', '납품요구건명', '업체명', '단가', '단위', '수량', '금액', '수요기관구분', '수요기관지역명','납품요구지청명']]
all_shop_df_fin = all_shop_df_fin.sort_values(['납품요구접수일자'],ascending=False).reset_index(drop=True)

In [139]:
# 특수문자 제거 함수 정의
def replace_special_characters(text):
    return re.sub(r'[^A-Za-z0-9가-힣\s]', ' ', text)

# 숫자와 영어 문자를 제거하는 함수 정의
def remove_numbers_and_english(text):
    return re.sub(r'[A-Za-z0-9]', ' ', text)

# 연속된 공백을 단일 공백으로 바꾸는 함수 정의
def replace_multiple_spaces_with_single(text):
    return re.sub(r'\s+', ' ', text)

In [140]:
all_shop_df_fin['납품요구건명_re'] = all_shop_df_fin['납품요구건명'].apply(replace_special_characters)
all_shop_df_fin['납품요구건명_re'] = all_shop_df_fin['납품요구건명_re'].apply(remove_numbers_and_english)
all_shop_df_fin['납품요구건명_re'] = all_shop_df_fin['납품요구건명_re'].apply(replace_multiple_spaces_with_single)

all_shop_df_fin['납품요구건명_re'] = all_shop_df_fin['납품요구건명_re'].str.strip()

#### 지자체 세부사업별 예산서

In [10]:
# 빅쿼리에서 불러오기
budget_df_today = get_dataframe_from_bigquery('budget','budget_df_0' + today,bigquerykey_path)
budget_df_ytday = get_dataframe_from_bigquery('budget','budget_df_0' + ytday,bigquerykey_path)

In [155]:
# 필터링
mapping_keywd_all = list(chain(*all_shop_df_fin['납품요구건명'].str.split(' ')))
mapping_keywd_all_filter_cnt = pd.Series(Counter(mapping_keywd_all))

mapping_keywd_all = list(chain(*all_shop_df_fin['납품요구건명'].str.split(' ')))
mapping_keywd_all_filter_cnt = pd.Series(Counter(mapping_keywd_all))

In [160]:
mapping_keywd_all_filter[mapping_keywd_all_filter <= 5].keys()

Index(['두호', '희망봉', '재정비사업-인조잔디', '옥산생활체육공원', '교체공사(인조잔디)', '본원', '구매설치',
       '소계체육공원', '시행', 'T35mm)',
       ...
       '함박공원', '파주중', '세경고등학교', '내항2통', '만경중학교', '건국고등학교', '인조잔디조성공사', '원곡중학교',
       '재(조성)사업을', '배정미래고등학교'],
      dtype='object', length=1472)

In [148]:
budget_df_today[budget_df_today['세부사업명'].str.contains(('|').join(mapping_keywd_all))]

Unnamed: 0,회계연도,지역명,자치단체명,회계구분명,세부사업코드,세부사업명,집행일자,예산현액,지출액,편성액,분야명,부문명,행정자치단체코드
0,2024,충북,충북증평군,일반회계,5570000201730065,초등학교 CCTV 연계사업,20240628,139432000,49596860,139432000,공공질서및안전,경찰,5570000
1,2024,서울,서울본청,소방특별회계,611000020223029E,소방학교 사이버콘텐츠 개발 추진,20240628,130900000,88410000,130900000,공공질서및안전,소방,6110000
2,2024,서울,서울본청,소방특별회계,61100002024300EC,소방학교 실화재 훈련장 건립,20240628,6111608000,41500000,6111608000,공공질서및안전,소방,6110000
3,2024,충남,충남본청,소방특별회계,6440000202130153,소방학교 업무추진 지원,20240628,129600000,61233700,129600000,공공질서및안전,소방,6440000
4,2024,전남,전남본청,소방특별회계,6460000201730067,소방학교 시설 확충,20240628,5714154000,1074532360,5670990000,공공질서및안전,소방,6460000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3760,2024,제주,제주본청,일반회계,6500000202430051,장기미집행 도시계획시설(평대공원) 조성사업,20240628,180000000,75600000,180000000,국토및지역개발,지역및도시,6500000
3761,2024,제주,제주본청,일반회계,6500000202430052,장기미집행 도시계획시설(두모공원) 조성사업,20240628,60000000,36101100,60000000,국토및지역개발,지역및도시,6500000
3762,2024,제주,제주본청,일반회계,6500000202430053,장기미집행 도시계획시설(고산공원) 조성사업,20240628,60000000,29500000,60000000,국토및지역개발,지역및도시,6500000
3763,2024,제주,제주본청,일반회계,6500000202430080,장기미집행 도시계획시설(새섬공원) 조성사업,20240628,200000000,182100670,200000000,국토및지역개발,지역및도시,6500000
