In [1]:
import pandas as pd
import numpy as np

import re
from itertools import chain
from collections import Counter

from google.cloud import bigquery
from google.oauth2 import service_account

from datetime import datetime, timedelta

import requests
import warnings
from requests.packages.urllib3.exceptions import InsecureRequestWarning

# InsecureRequestWarning 경고 무시
warnings.simplefilter('ignore', InsecureRequestWarning)

In [3]:
KEY_PATH = ".config/"
servicekey_path = KEY_PATH + "serviceKey.json" ## 빅쿼리 외 다른 API 활용 위해
bigquerykey_path = KEY_PATH + "mido-project-426906-31b49963ac97.json"

warnings.filterwarnings("ignore")

In [4]:
# BigQuery 클라이언트 생성 함수
def create_bigquery_client(key_path):
    credentials = service_account.Credentials.from_service_account_file(key_path)
    client = bigquery.Client(credentials=credentials, project=credentials.project_id)
    return client

In [5]:
def save_dataframe_to_bigquery(df, dataset_id, table_id, key_path):
    # BigQuery 클라이언트 객체 생성
    client = create_bigquery_client(key_path)

    # 테이블 레퍼런스 생성
    table_ref = client.dataset(dataset_id).table(table_id)

    # 데이터프레임을 BigQuery 테이블에 적재
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = "WRITE_TRUNCATE"  # 기존 테이블 내용 삭제 후 삽입

    job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
    job.result()  # 작업 완료 대기

    print(f"Data inserted into table {table_id} successfully.")

In [6]:
def get_dataframe_from_bigquery(dataset_id, table_id, key_path):
    # BigQuery 클라이언트 생성
    client = create_bigquery_client(key_path)

    # 테이블 레퍼런스 생성
    table_ref = client.dataset(dataset_id).table(table_id)

    # 테이블 데이터를 DataFrame으로 변환
    df = client.list_rows(table_ref).to_dataframe()

    return df

In [7]:
# 오늘 날짜
today = datetime.today()#.strftime('%Y%m%d')

# 어제 날짜 계산
ytday = datetime.today() - timedelta(days=1)

# 만약 어제, 오늘이 토요일(5) 또는 일요일(6)이라면, 그 전주 금요일로 변경
if ytday.weekday() == 5:  # 토요일
    ytday -= timedelta(days=1)
elif ytday.weekday() == 6:  # 일요일
    ytday -= timedelta(days=2)
if today.weekday() == 5:  # 토요일
    today -= timedelta(days=1)
elif today.weekday() == 6:  # 일요일
    today -= timedelta(days=2)

# 'YYYYMMDD' 형식으로 변환
ytday = ytday.strftime('%Y%m%d')
today = today.strftime('%Y%m%d')

#### 법정동코드

In [8]:
dist_code = pd.read_csv('C:\py_src\midoproject\data/법정동코드 전체자료.csv',encoding='cp949')

In [9]:
dist_code_list = []
for i in range(dist_code['법정동명'].str.split().apply(len).max()):
    dist_code_temp = dist_code[dist_code['법정동명'].str.split().str[i].str[-1].isin(['읍','면','동','리','가','로']).dropna()]
    dist_code_list.append([dist_code_temp['법정동명'].str.split().str[i].dropna().unique()])

emd_code_list = list(chain(*chain(*dist_code_list)))

#### 인포21

In [10]:
# '전국'을 제거하고 나머지 두 개를 쉼표로 연결하는 함수 정의
def remove_and_join(region):
    # '전국'을 제거하고 나머지 두 개를 쉼표로 연결
    regions = region.split(',')
    regions = [r for r in regions if r != '전국']
    return ','.join(regions)

In [11]:
# 빅쿼리에서 불러오기
bid_info_dict = {}
bid_info_dict_fin = {}
bid_info_dict['con'] = get_dataframe_from_bigquery('info21','bid_con_df_0' + today[4:6], bigquerykey_path)
bid_info_dict['ser'] = get_dataframe_from_bigquery('info21','bid_ser_df_0' + today[4:6], bigquerykey_path)
bid_info_dict['pur'] = get_dataframe_from_bigquery('info21','bid_pur_df_0' + today[4:6], bigquerykey_path)

TypeError: get_dataframe_from_bigquery() takes 2 positional arguments but 3 were given

In [48]:
# 필요 데이터 추출
for bid in bid_info_dict.keys():

    # 필요 정보 추출
    filtered_bid_df = bid_info_dict[bid][bid_info_dict[bid]['업종'].fillna('').str.contains('건축|건설|ENG|엔지니어링')].reset_index(drop=True)
    filtered_bid_df = filtered_bid_df[~filtered_bid_df['업종'].fillna('').str.contains('폐기물')].reset_index(drop=True)
    # filtered_bid_df = filtered_bid_df[~filtered_bid_df['공고명'].str.contains('폐기물')].reset_index(drop=True)

    # 날짜 필터링
    filtered_bid_df = filtered_bid_df[filtered_bid_df['투찰마감'] > datetime.today().strftime('%y.%m.%d')].reset_index(drop=True) ## 투찰마감일이 오늘보다 지났으면 제거



    # 필요키워드1
    need_kwd1 = '학교|초등학교|중학교|고등학교|대학교|다목적|운동장|종합운동장|공원|체육공원|체육시설|체육센터|스포츠센터|연병장|구장|축구장|족구장|야구장|풋살장|경기장|배드민턴장|테니스장|게이트볼장|하키장|관급'

    # 필요키워드2
    need_kwd2 = '개선|교체|보수|공사|구매|구입|설치|정비|조성'

    # 필요키워드3
    need_kwd3 = '인조잔디'

    # 제외키워드
    except_kwd = 'CCTV|cctv|LED|led|가로등|가속기|간접등|간판|개방|검진|경관|계단|골프장|관리|광고|교량|교실|교육|교차로|균열|그늘막|그림|급수관|급식|난방기|내진|냉방기|네트워크|노상|노점|농기계|단열|데이터|도로개설|도로방음벽|도로정비|도로확장|도서관|디자인|뚜껑|막구조물|맨홀|모노레일|모니터링|미끄럼|미세먼지|미술|방송|방수|방음|방음벽|배면도로|배수|버스|법률자문|벤치|벽화|보건|보도블럭|보상|보일러|보행자도로|복지|볼라드|분묘|불법|빗물받이|사용료|사워실|살포|생육환경|석면|선홈통|소프트웨어|수도|수로|수목표찰|수영장|스크린|스프링클러|승강기|시계탑|시스템|식수대|신호등|안심벨|안전|약수터|억제제|역량|연결도로|예방|예술|오염|옹벽|요양기관|운영|울타리|위생|위험수목|유아|육교|음성안내|음악|의자|인양기|인터넷|입학|자료|자전거|저장고|저장장치|전광판|전기|전신주|전통시장|정화|정화조|조도|조명|조형물|조화기|주차장|줄눈|진로진학|진입도로|창고|창출|처우|철거|취업|치안|카메라|카약|통학로|파고라|펜스|폐기물|폐열|포충기|풀장|풍차|하수|하수관로|하수도|하천|학습|행사|화장|화장실|활성화|횡단보도|휀스|흡연'

    # 키워드 기준 데이터 추출
    bid_info_df = bid_info_dict[bid][((bid_info_dict[bid]['공고명'].str.contains(need_kwd1) & 
                                       bid_info_dict[bid]['공고명'].str.contains(need_kwd2)) &
                                       ~bid_info_dict[bid]['공고명'].str.contains(except_kwd)) |
                                       bid_info_dict[bid]['공고명'].str.contains(need_kwd3)
                                       ].reset_index(drop=True)
                    
    # 전처리
    bid_info_df['구분'] = None
    # info_df['담당자'] = None
    # info_df['연락처'] = None
    if bid!='pur':
        bid_info_df_fin = bid_info_df[['입력일','지역','발주기관','구분','공고명','업종','분류']]
        bid_info_df_fin.columns = ['공고일','지역','발주처','구분','공고명','업종','분류']

        bid_info_df_fin['공고일'] = pd.to_datetime(bid_info_df_fin['공고일'], format='%y.%m.%d').dt.strftime('%Y-%m-%d')
        bid_info_df_fin['지역'] = bid_info_df_fin['지역'].apply(remove_and_join)

        bid_info_dict_fin[bid] = bid_info_df_fin
        
    else:
        bid_info_df_fin = bid_info_df[['개찰일','공고명','업종','분류']]
        bid_info_dict_fin[bid] = bid_info_df_fin

In [53]:
# 빅쿼리에 적재
save_dataframe_to_bigquery(bid_info_dict_fin['con'],'info21','bid_con_df_listup',bigquerykey_path)
save_dataframe_to_bigquery(bid_info_dict_fin['ser'],'info21','bid_ser_df_listup',bigquerykey_path)
save_dataframe_to_bigquery(bid_info_dict_fin['pur'],'info21','bid_pur_df_listup',bigquerykey_path)

Data inserted into table bid_con_df_listup successfully.
Data inserted into table bid_ser_df_listup successfully.
Data inserted into table bid_pur_df_listup successfully.
