In [None]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd
from datetime import datetime

# List of cooperation codes to scrape
cooperation_codes = [
    '09106', '09297', '09922', '09570', '09157',
    '09250', '09247', '09113', '09114', '04350'
]

# Base URL
base_url = 'https://card.kbcard.com/CRD/DVIEW/HCAMCXPRICAC0076'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}

# Store results
results = []

for code in cooperation_codes:
    params = {
        'mainCC': 'a',
        'cooperationcode': code
    }
    
    try:
        print(f"Scraping cooperation code: {code}")
        response = requests.get(base_url, params=params, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract page title
        title = soup.find('title')
        page_title = title.get_text(strip=True) if title else "No title found"
        
        # Try to extract card information
        card_info = {}
        
        # Look for card name/description
        card_selectors = [
            'h1', 'h2', 'h3',
            '.card-name', '.card-title',
            '#mainForm h3',
            '.contentArea h3'
        ]
        
        for selector in card_selectors:
            elem = soup.select_one(selector)
            if elem and elem.get_text(strip=True):
                card_info['card_name'] = elem.get_text(strip=True)
                break
        
        # Look for benefits or features
        benefit_selectors = [
            '.benefit', '.feature', '.txtBox1_hide div',
            '.contentArea div', '.txtBox div'
        ]
        
        benefits = []
        for selector in benefit_selectors:
            elements = soup.select(selector)
            for elem in elements:
                text = elem.get_text(strip=True)
                if text and len(text) > 10:  # Filter out very short text
                    benefits.append(text)
        
        # Store results
        result = {
            'cooperation_code': code,
            'url': response.url,
            'status_code': response.status_code,
            'page_title': page_title,
            'card_name': card_info.get('card_name', 'Not found'),
            'benefits': ' | '.join(benefits[:5]) if benefits else 'No benefits found',
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }
        
        results.append(result)
        print(f"✓ Successfully scraped {code}")
        
        # Add delay to be respectful to the server
        time.sleep(1)
        
    except requests.exceptions.RequestException as e:
        print(f"✗ Error scraping {code}: {e}")
        results.append({
            'cooperation_code': code,
            'url': f"{base_url}?mainCC=a&cooperationcode={code}",
            'status_code': 'Error',
            'page_title': 'Error',
            'card_name': 'Error',
            'benefits': f'Error: {str(e)}',
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        })

print(f"\nScraping completed! Processed {len(results)} URLs.")

In [None]:
# Display results as a DataFrame
df = pd.DataFrame(results)
print("Scraping Results:")
print("=" * 80)
print(df.to_string(index=False))

# Save to CSV
csv_filename = f"kb_card_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
df.to_csv(csv_filename, index=False, encoding='utf-8-sig')
print(f"\nData saved to: {csv_filename}")

In [None]:
# Detailed analysis of successful scrapes
successful_scrapes = df[df['status_code'] == 200]
failed_scrapes = df[df['status_code'] != 200]

print(f"Successful scrapes: {len(successful_scrapes)}")
print(f"Failed scrapes: {len(failed_scrapes)}")
print("\n" + "="*50)

if len(successful_scrapes) > 0:
    print("\nSuccessful Scrapes:")
    for _, row in successful_scrapes.iterrows():
        print(f"\nCooperation Code: {row['cooperation_code']}")
        print(f"Card Name: {row['card_name']}")
        print(f"URL: {row['url']}")
        print(f"Benefits: {row['benefits'][:200]}..." if len(row['benefits']) > 200 else f"Benefits: {row['benefits']}")
        print("-" * 40)

if len(failed_scrapes) > 0:
    print("\nFailed Scrapes:")
    for _, row in failed_scrapes.iterrows():
        print(f"Cooperation Code: {row['cooperation_code']} - {row['benefits']}")