<a href="https://colab.research.google.com/github/kty0307/Blog/blob/main/GPTs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 0. 환경설정

In [18]:
import requests
import json
import pandas as pd
import urllib.parse
import urllib3
import ssl
from urllib.parse import unquote
from requests.adapters import HTTPAdapter
from urllib3.util.ssl_ import create_urllib3_context
from google.colab import files

In [3]:
class TLSAdapter(requests.adapters.HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        ctx = ssl.create_default_context()
        ctx.set_ciphers("AES128-SHA256")
        kwargs["ssl_context"] = ctx
        return super(TLSAdapter, self).init_poolmanager(*args, **kwargs)

In [None]:
class NewTLSAdapter(HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        ctx = ssl.create_default_context()
        ctx.set_ciphers("AES128-SHA256")  # 원하는 Cipher Suite 설정
        kwargs["ssl_context"] = ctx
        return super().init_poolmanager(*args, **kwargs)

In [4]:
def subtract_month(df, col_name, new_col_name):
    df[new_col_name] = pd.to_datetime(df[col_name], format='%Y%m') - pd.DateOffset(months=1)
    df[new_col_name] = df[new_col_name].dt.strftime('%Y%m').astype(int)
    return df

# 1. 저축은행

## 1.1. 신용점수별 금리

In [10]:
def savings_cs(year, month):
  url = "https://www.fsb.or.kr/ratloanconf_0200.jct"
  payload = {
      "_JSON_": json.dumps({
          "SORT_COLUMN": "",
          "SORT": "",
          "PRE_MONTH_MONEY": "",
          "SUBMIT_MONTH": f"{year:04d}{month:02d}"
      })
  }

  response = requests.post(url, data=payload)

  data = response.text

  json_data = json.loads(data)

  # 데이터 추출 및 DataFrame 생성
  df = pd.DataFrame(json_data['REC'])
  df = df[['BANK_NAME', 'SUBMIT_MONTH', 'A_RATE1_3', 'A_RATE1', 'A_RATE2',
            'A_RATE3', 'A_RATE_AVE']]

  df.columns = ['사명','공시연월','900점대','800점대','700점대','600점대','평균금리']

  df['업권'] = '저축은행업권'

  df = subtract_month(df, '공시연월', '연월')

  df = df[['사명','연월','업권','평균금리','900점대','800점대','700점대','600점대','공시연월']]

  return df

## 1.2. 금리대별 취급비중

In [None]:
def savings_int(year, month):
    url = "https://www.fsb.or.kr/ratloanconf_0300.jct"

    payload = {
        "SORT": "",
        "SUBMIT_MONTH": f"{year}{month:02}"
    }

    response = requests.post(url, data={"_JSON_": json.dumps(payload)})
    response.raise_for_status()  # HTTP 오류 발생 시 예외 발생

    data = json.loads(response.text)

    df = pd.DataFrame(data["REC"])  # 모든 열을 포함하는 DataFrame 생성

    # 원하는 열만 선택
    desired_columns = ["BANK_NAME", "SUBMIT_MONTH", "HANDING_WEIGHT_10", "HANDING_WEIGHT_12", "HANDING_WEIGHT_14"
                        ,"HANDING_WEIGHT_16", "HANDING_WEIGHT_18", "HANDING_WEIGHT_20"]
    df = df[desired_columns] #원하는 열만 남기기

    df.columns = ['사명','제출연월','10%이하','12%이하','14%이하','16%이하','18%이하','20%이하']

    df['업권'] = '저축은행업권'

    return df

# 2. 신용카드

## 2.1. 카드론_신용점수별 금리

In [12]:
def card_loan_cs(year, month):
    url = "https://gongsi.crefia.or.kr/portal/creditcard/creditcardDisclosureDetail25Ajax"

    # cgc_seq(월)을 찾기 위한 크롤링
    with requests.session() as s:
      s.mount("https://", TLSAdapter())

      params = {
        "cgcSeq": 1458,
        "cgcMode": 25,
        "cgcYyyy": year,
        "mcSeq": []
      }

      response = s.get(url, params=params)
      response.raise_for_status()  # HTTP 오류 발생 시 예외 발생

    pick_month = json.loads(response.text)  # 응답 문자열 반환

    for item in pick_month['configListMm']:
        if item['cgcquarter'] == month:
            cgc_seq = item['cgcSeq']
            break  # 첫 번째로 찾은 값만 출력하고 반복문 종료

    # 실제 데이터를 추출하기 위한 크롤링
    with requests.session() as s:
      s.mount("https://", TLSAdapter())

      params = {
        "cgcSeq": cgc_seq,
        "cgcMode": 25,
        "cgcYyyy": year,
        "mcSeq": [31, 96, 1, 106, 14, 13, 12, 98, 502, 108, 619, 11, 97, 105, 103, 22]
      }

      response = s.get(url, params=params)
      response.raise_for_status()  # HTTP 오류 발생 시 예외 발생

    data = json.loads(response.text)

    pick_data = data["resultList"]
    df = pd.DataFrame(pick_data)  # 모든 열을 포함하는 DataFrame 생성

    # 원하는 열만 선택
    df = df[["mcCompany", "cgcSeq", "cgCardPoint1", "cgCardPoint2", "cgCardPoint3"
                ,"cgCardPoint4", "cgCardPointAvg"]] #원하는 열만 남기기

    df["cgcSeq"] = f"{year}{month:02}" if month < 10 else f"{year}{month}"

    df.columns = ['사명','공시연월','900점대','800점대','700점대','600점대','평균금리']

    df['업권'] = '신용카드업권(카드론)'

    df = subtract_month(df, '공시연월', '연월')

    df = df[['사명','연월','업권','평균금리','900점대','800점대','700점대','600점대','공시연월']]

    return df

## 2.2. 현금서비스_신용점수별 금리

In [13]:
def card_cash_cs(year, month):
    url = "https://gongsi.crefia.or.kr/portal/creditcard/creditcardDisclosureDetail20Ajax"

    # cgc_seq(월)을 찾기 위한 크롤링
    with requests.session() as s:
      s.mount("https://", TLSAdapter())

      params = {
        "cgcSeq": 1460,
        "cgcMode": 20,
        "cgcYyyy": year,
        "mcSeq": []
      }

      response = s.get(url, params=params)
      response.raise_for_status()  # HTTP 오류 발생 시 예외 발생

    pick_month = json.loads(response.text)  # 응답 문자열 반환

    for item in pick_month['configListMm']:
        if item['cgcquarter'] == month:
            cgc_seq = item['cgcSeq']
            break  # 첫 번째로 찾은 값만 출력하고 반복문 종료

    # 실제 데이터를 추출하기 위한 크롤링
    with requests.session() as s:
      s.mount("https://", TLSAdapter())

      params = {
        "cgcSeq": cgc_seq,
        "cgcMode": 20,
        "cgcYyyy": year,
        "mcSeq": [31, 96, 1, 106, 14, 13, 12, 98, 502, 108, 619, 11, 97, 105, 103, 22]
      }

      response = s.get(url, params=params)
      response.raise_for_status()  # HTTP 오류 발생 시 예외 발생

    data = json.loads(response.text)

    pick_data = data["resultList"]
    df = pd.DataFrame(pick_data)  # 모든 열을 포함하는 DataFrame 생성

    # 원하는 열만 선택
    df = df[["mcCompany", "cgcSeq", "cgMoneyPoint1", "cgMoneyPoint2", "cgMoneyPoint3"
                ,"cgMoneyPoint4", "cgMoneyPointAvg"]] #원하는 열만 남기기

    df["cgcSeq"] = f"{year}{month:02}" if month < 10 else f"{year}{month}"

    df.columns = ['사명','공시연월','900점대','800점대','700점대','600점대','평균금리']

    df['업권'] = '신용카드업권(현금서비스)'

    df = subtract_month(df, '공시연월', '연월')

    df = df[['사명','연월','업권','평균금리','900점대','800점대','700점대','600점대','공시연월']]

    return df

# 3. 캐피탈

## 3.1. 신용점수별 금리

In [14]:
def capital_cs(year, month):
    url = "https://gongsi.crefia.or.kr/portal/creditloan/creditloanDisclosureDetail11/ajax"

    with requests.session() as s:
      s.mount("https://", TLSAdapter())

      params = {
          "clgcMode": 11,
          "cardItem": "134,39,40,623,130,41,25,156,6,55,32,58,52,61,57,64",
          "clgcSeq": 521,
          "clgcYyyy": 2024
      }

      response = s.get(url, params=params)
      response.raise_for_status()  # HTTP 오류 발생 시 예외 발생

    pick_month = json.loads(response.text)  # 응답 문자열 반환

    for item in pick_month['configListMm']:
        if item['clgcquarter'] == month:
            clgc_seq = item['clgcSeq']
            break  # 첫 번째로 찾은 값만 출력하고 반복문 종료

    # 실제 데이터를 추출하기 위한 크롤링
    with requests.session() as s:
      s.mount("https://", TLSAdapter())

      params = {
          "clgcMode": 11,
          "cardItem": "134,39,40,623,130,41,25,156,6,55,32,58,52,61,57,64",
          "clgcSeq": clgc_seq,
          "clgcYyyy": year
      }

      response = s.get(url, params=params)
      response.raise_for_status()  # HTTP 오류 발생 시 예외 발생

    data = json.loads(response.text)

    pick_data = data["resultList"]
    df = pd.DataFrame(pick_data)  # 모든 열을 포함하는 DataFrame 생성

    # 원하는 열만 선택
    df = df[["mcCompany", "clgcSeq", "clgPoint1", "clgPoint2", "clgPoint3"
                ,"clgPoint4", "clgPointAvg"]] #원하는 열만 남기기

    df["clgcSeq"] = f"{year}{month:02}" if month < 10 else f"{year}{month}"

    df.columns = ['사명','공시연월','900점대','800점대','700점대','600점대','평균금리']

    df['업권'] = '캐피탈업권'

    df = subtract_month(df, '공시연월', '연월')

    df = df[['사명','연월','업권','평균금리','900점대','800점대','700점대','600점대','공시연월']]

    return df


In [9]:
capital_cs(2025, 1)

Unnamed: 0,사명,연월,업권,평균금리,900점대,800점대,700점대,600점대,공시연월
0,아이엠캐피탈,202412,캐피탈업권,14.8,13.78,14.2,14.64,15.32,202501
1,한국캐피탈,202412,캐피탈업권,18.71,17.76,18.34,18.79,19.0,202501
2,한국투자캐피탈,202412,캐피탈업권,14.12,12.2,12.86,13.58,15.07,202501
3,BNK캐피탈,202412,캐피탈업권,13.74,12.52,13.14,13.58,15.09,202501
4,KB캐피탈,202412,캐피탈업권,14.35,12.32,13.93,15.88,17.06,202501
5,롯데캐피탈,202412,캐피탈업권,15.71,12.76,14.17,15.9,17.38,202501
6,메리츠캐피탈,202412,캐피탈업권,19.59,0.0,19.23,19.69,19.56,202501
7,우리금융캐피탈,202412,캐피탈업권,15.23,12.17,14.93,17.01,17.75,202501
8,하나캐피탈,202412,캐피탈업권,14.0,13.5,13.9,14.32,14.72,202501
9,현대캐피탈,202412,캐피탈업권,17.31,14.35,15.85,17.37,18.18,202501


# 4. 은행

## 4.1. 신용점수별 금리

은행은 비동기 크롤링 기술이 필요하여 추후 업데이트하기

In [None]:
import requests
from urllib.parse import urlencode

def crawl_kfb_loan_data(year, month, opt_1, detail, str_value, select_new_balance):
    """
    한국금융복지센터 대출 비교 정보를 크롤링하는 함수

    Args:
        year (str): 년도
        month (str): 월
        opt_1 (str): 옵션 1
        detail (str): 상세 정보
        str_value (str): 금융기관 코드 문자열
        select_new_balance (str): 잔액 기준

    Returns:
        str: 크롤링된 데이터 (성공 시) 또는 에러 메시지 (실패 시)
    """

    url = "https://portal.kfb.or.kr/compare/loan_household_new.php"
    headers = {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
        "Connection": "keep-alive",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "Cookie": "_fwb=212unpbFH4gShFqwzNvDsjk.1721955719107; _ga=GA1.3.1753461068.1722214698; _ga_0HWLZE2375=GS1.3.1722214698.1.1.1722214698.0.0.0; PHPSESSID=c1ek78m0dlg8oce33ud2t9fg74", # 실제 Cookie 값으로 변경 필요
        "Host": "portal.kfb.or.kr",
        "Origin": "https://portal.kfb.or.kr",
        "Referer": "https://portal.kfb.or.kr/compare/loan_household_new.php",
        "Sec-Ch-Ua": '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
        "Sec-Ch-Ua-Mobile": "?0",
        "Sec-Ch-Ua-Platform": '"Windows"',
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest"
    }

    payload = {
        "year": year,
        "month": month,
        "opt_1": opt_1,
        "detail": detail,
        "str": str_value,
        "select_new_balance": select_new_balance
    }

    with requests.session() as s:
      s.mount("https://", TLSAdapter())
      response = s.post(url, headers=headers, data=payload)
      response.raise_for_status()
    return response.text

# 사용 예시
year = "2025"
month = "01"
opt_1 = "3"
detail = "0"
str_value = "KDB산업은행|NH농협은행|신한은행|우리은행|SC제일은행|하나은행|IBK기업은행|KB국민은행|한국씨티은행|Sh수협은행|iM뱅크(구 대구은행)|BNK부산은행|광주은행|제주은행|전북은행|BNK경남은행|케이뱅크|카카오뱅크|토스뱅크"
select_new_balance = "1"

result = crawl_kfb_loan_data(year, month, opt_1, detail, str_value, select_new_balance)
print(result)

In [None]:
import asyncio
import requests
from urllib.parse import urlencode
from requests_html import AsyncHTMLSession
from requests.adapters import HTTPAdapter
import ssl
import nest_asyncio
nest_asyncio.apply()

class TLSAdapter(HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        ctx = ssl.create_default_context()
        ctx.set_ciphers("AES128-SHA256")  # 또는 더 안전한 Cipher Suite 지정
        kwargs["ssl_context"] = ctx
        return super().init_poolmanager(*args, **kwargs)

async def crawl_kfb_loan_data(year, month, opt_1, detail, str_value, select_new_balance):
    url = "https://portal.kfb.or.kr/compare/loan_household_new.php"
    headers = {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
        "Connection": "keep-alive",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "Cookie": "_fwb=212unpbFH4gShFqwzNvDsjk.1721955719107; _ga=GA1.3.1753461068.1722214698; _ga_0HWLZE2375=GS1.3.1722214698.1.1.1722214698.0.0.0; PHPSESSID=c1ek78m0dlg8oce33ud2t9fg74",  # 실제 Cookie 값으로 변경 필요
        "Host": "portal.kfb.or.kr",
        "Origin": "https://portal.kfb.or.kr",
        "Referer": "https://portal.kfb.or.kr/compare/loan_household_new.php",
        "Sec-Ch-Ua": '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
        "Sec-Ch-Ua-Mobile": "?0",
        "Sec-Ch-Ua-Platform": '"Windows"',
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest"
    }

    payload = {
        "year": year,
        "month": month,
        "opt_1": opt_1,
        "detail": detail,
        "str": str_value,
        "select_new_balance": select_new_balance
    }

    try:
        session = AsyncHTMLSession()
        with session as s:
            s.mount("https://", TLSAdapter())
            r = await s.post(url, headers=headers, data=payload)
            r.raise_for_status()

            # 1. arender()에 timeout 및 sleep 매개변수 사용
            await r.html.arender(timeout=20, sleep=5)  # 필요에 따라 timeout 및 sleep 값 조정

            result = await r.html.arender(script="""
                return document.querySelector('.Resultitle ul.info li.leftArea span.title').textContent;
            """)

            # 4. 특정 텍스트가 나타나는지 확인 (예: "대출금리" 텍스트)
            # if "대출금리" in r.html.html:
            #     print("Data loaded based on text check")
            #     # 이제 r.html.html 파싱 가능

            # 5. JavaScript 코드 실행 (고급)
            # await r.html.arender(script="""
            #     const element = document.querySelector('#someElement');
            #     if (element && element.textContent === 'Expected Value') {
            #         return true;
            #     }
            #     return false;
            # """, timeout=20, sleep=5)

            return r.html.html

    except requests.exceptions.RequestException as e:
        print(f"Error during request: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

async def main():  # main 함수를 async로 정의
    year = "2025"
    month = "01"
    opt_1 = "3"
    detail = "0"
    str_value = "KDB%BB%EA%BE%F7%C0%BA%C7%E0|NH%B3%F3%C7%F9%C0%BA%C7%E0|%BD%C5%C7%D1%C0%BA%C7%E0|%BF%EC%B8%AE%C0%BA%C7%E0|SC%C1%A6%C0%CF%C0%BA%C7%E0|%C7%CF%B3%AA%C0%BA%C7%E0|IBK%B1%E2%BE%F7%C0%BA%C7%E0|KB%B1%B9%B9%CE%C0%BA%C7%E0|%C7%D1%B1%B9%BE%BE%C6%BC%C0%BA%C7%E0|Sh%BC%F6%C7%F9%C0%BA%C7%E0|iM%B9%F0%C5%A9%28%B1%B8+%B4%EB%B1%B8%C0%BA%C7%E0%29|BNK%BA%CE%BB%EA%C0%BA%C7%E0|%B1%A4%C1%D6%C0%BA%C7%E0|%C1%A6%C1%D6%C0%BA%C7%E0|%C0%FC%BA%CF%C0%BA%C7%E0|BNK%B0%E6%B3%B2%C0%BA%C7%E0|%C4%C9%C0%CC%B9%F0%C5%A9|%C4%AB%C4%AB%BF%C0%B9%F0%C5%A9|%C5%E4%BD%BA%B9%F0%C5%A9"
    select_new_balance = "1"

    result = await crawl_kfb_loan_data(year, month, opt_1, detail, str_value, select_new_balance)  # await 필수

    if result:
        print(result)
    else:
        print("크롤링 실패")

asyncio.run(main())  # asyncio.run()으로 main 함수 실행

# 5. 데이터 처리 및 다운로드

In [20]:
year = 2025
month = 2
file_names = f'SP_INTRATE_{year}{month:02d}.csv'

In [21]:
df_1 = savings_cs(year, month)
df_2 = card_loan_cs(year, month)
df_3 = card_cash_cs(year, month)
df_4 = capital_cs(year, month)

df = pd.concat([df_1, df_2, df_3, df_4], ignore_index=True)
df.to_csv(file_names, index=False)

In [22]:
files.download(file_names)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>