In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def extract_symptoms_from_template(article_title):
    base_url = "https://ko.wikipedia.org/wiki/"
    full_url = base_url + article_title.replace(" ", "_")

    try:
        response = requests.get(full_url, timeout=10)
        response.raise_for_status()
    except (requests.ConnectionError, requests.Timeout, requests.HTTPError):
        return "위키백과 페이지를 가져올 수 없습니다."

    soup = BeautifulSoup(response.content, "html.parser")

    template_section = soup.find("table", class_="infobox")
    if not template_section:
        return "틀 정보를 찾을 수 없습니다."

    symptom_info = template_section.find("th", string="증상")
    if not symptom_info:
        return "증상 정보를 찾을 수 없습니다."

    symptom_text = symptom_info.find_next("td").get_text().strip()
    return symptom_text

def main():
    csv_file_path = "C:/Users/cause/Downloads/진단명리스트(번역)1.csv"
    symptom = pd.read_csv(csv_file_path, encoding="utf-8")

    diagnosis_column_name = "진단명"
    symptom_column_name = "증상명"

    symptom[symptom_column_name] = ""

    for index, row in symptom.iterrows():
        diagnosis = row[diagnosis_column_name]
        symptom_info = extract_symptoms_from_template(diagnosis)
        symptom.at[index, symptom_column_name] = symptom_info

    output_excel_file = "output1.xlsx"
    symptom.to_excel(output_excel_file, index=False)
    print("작업이 완료되었습니다.")

if __name__ == "__main__":
    main()

KeyboardInterrupt: 

In [4]:
pip install cachetools

Collecting cachetoolsNote: you may need to restart the kernel to use updated packages.

  Downloading cachetools-5.3.1-py3-none-any.whl (9.3 kB)
Installing collected packages: cachetools
Successfully installed cachetools-5.3.1


In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from cachetools import cached, TTLCache

# Cache with a time-to-live (TTL) of 1 hour
cache = TTLCache(maxsize=100, ttl=3600)

@cached(cache)
def extract_symptoms_from_template(article_title):
    base_url = "https://en.wikipedia.org/wiki/"
    full_url = base_url + article_title.replace(" ", "_")

    try:
        response = requests.get(full_url, timeout=10)
        response.raise_for_status()
    except (requests.ConnectionError, requests.Timeout, requests.HTTPError):
        return "위키백과 페이지를 가져올 수 없습니다."

    soup = BeautifulSoup(response.content, "html.parser")

    template_section = soup.find("table", class_="infobox")
    if not template_section:
        return "틀 정보를 찾을 수 없습니다."

    symptom_info = template_section.find("th", string="증상")
    if not symptom_info:
        return "증상 정보를 찾을 수 없습니다."

    symptom_text = symptom_info.find_next("td").get_text().strip()
    return symptom_text

def main():
    csv_file_path = "C:/Users/cause/Downloads/진단명리스트(번역)1.csv"
    symptom = pd.read_csv(csv_file_path, encoding="utf-8")

    diagnosis_column_name = "진단명(영어)"
    symptom_column_name = "증상명"

    symptom[symptom_column_name] = ""

    for index, row in symptom.iterrows():
        diagnosis = row[diagnosis_column_name]
        symptom_info = extract_symptoms_from_template(diagnosis)
        symptom.at[index, symptom_column_name] = symptom_info

    output_excel_file = "output_with_caching.xlsx"
    symptom.to_excel(output_excel_file, index=False)
    print("작업이 완료되었습니다.")

if __name__ == "__main__":
    main()

KeyboardInterrupt: 

In [11]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from cachetools import cached, TTLCache

# 캐시 설정: 최대 100개 아이템, 각 아이템은 1시간 동안 유지됨
cache = TTLCache(maxsize=100, ttl=3600)

@cached(cache)
def extract_symptoms_from_template(article_title):
    base_url = "https://en.wikipedia.org/wiki/"
    full_url = base_url + article_title.replace(" ", "_")

    response = requests.get(full_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")

        # Find the section containing the template information
        template_section = soup.find("table", {"class": "infobox"})
        if not template_section:
            return "Template information not found."

        # Find the specific symptom information within the template
        symptom_info = template_section.find("th", string="Symptoms")
        if not symptom_info:
            return "Symptom information not found."

        symptom_text = symptom_info.find_next("td").get_text().strip()
        return symptom_text
    else:
        return "Failed to fetch Wikipedia page."

# CSV 파일 읽기
input_csv_file = "C:/Users/cause/Downloads/진단명리스트(번역)1.csv"
output_csv_file = "진단명리스트(번역)1 설명추가.csv"

data = pd.read_csv(input_csv_file, engine='python', encoding='utf-8-sig')

# 열 이름 설정
diagnosis_column_name = "진단명(영어)"
symptom_column_name = "증상"

# 새로운 열 추가
data[symptom_column_name] = ""

# 진단명을 순회하면서 증상 정보 추출하여 열에 추가
for index, row in data.iterrows():
    diagnosis = row[diagnosis_column_name]
    symptom_info = extract_symptoms_from_template(diagnosis)
    data.at[index, symptom_column_name] = symptom_info

# 결과를 새 CSV 파일로 저장
data.to_csv(output_csv_file, index=False, encoding='utf-8-sig')
print("작업이 완료되었습니다.")

작업이 완료되었습니다.
