import libraries

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from google.oauth2 import service_account
from googleapiclient.discovery import build
import re
import json


Build service

In [None]:
# Path to the uploaded service account key file in Colab
SERVICE_ACCOUNT_FILE = '/content/drive/MyDrive/Colab Notebooks/Merchant Center API/service_account_key.json'

# Define the required API scope
SCOPES = ['https://www.googleapis.com/auth/content']

# Authenticate using the service account
credentials = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES
)

# Build the Google Content API client
service = build('content', 'v2.1', credentials=credentials)

# Merchant Center account ID (replace with your actual ID)
merchant_id = '5411908926'


# Examples

In [None]:

# Example: Get details of the Merchant Center account (non-MCA)
try:
    request = service.accounts().get(merchantId=merchant_id, accountId=merchant_id)
    response = request.execute()
    print("Account details:")
    print(json.dumps(response, indent=4))
except Exception as e:
    print(f"An error occurred: {e}")



In [None]:
# List products in the Merchant Center account
try:
    request = service.products().list(merchantId=merchant_id)
    response = request.execute()
    print("Product list:")
    print(json.dumps(response, indent=4))
except Exception as e:
    print(f"An error occurred: {e}")


In [None]:
# Example: Get details of the Merchant Center account (non-MCA)
try:
    # Use the same merchantId and accountId for non-MCA accounts
    request = service.accounts().get(merchantId=merchant_id, accountId=merchant_id)
    response = request.execute()
    print("Account details:")
    print(json.dumps(response, indent=4))
except Exception as e:
    print(f"An error occurred: {e}")



In [None]:
# Get shipping settings for the Merchant Center account
try:
    request = service.shippingsettings().get(merchantId=merchant_id, accountId=merchant_id)
    response = request.execute()
    print("Shipping settings:")
    print(json.dumps(response, indent=4))
except Exception as e:
    print(f"An error occurred: {e}")


In [None]:
try:
    request = service.accountstatuses().list(merchantId=merchant_id)
    response = request.execute()
    print("Account status:")
    print(response)
except Exception as e:
    print(f"An error occurred: {e}")


In [None]:
product_id = 'online:en:US:09780062441720'
product_id2 = '09798886633368'


try:
    # Retrieve product data
    request = service.products().get(merchantId=merchant_id, productId=product_id)
    response = request.execute()
    print("Product details:")
    print(json.dumps(response, indent=4))
except Exception as e:
    print(f"An error occurred: {e}")


# Scraping Publicaiton Pages (front end) from a US IP and updating the pricing for single issue product

1. Prepare your CSV (e.g., merchant_center_products.csv) with the required columns.
2. Update CSV_FILE_PATH if it’s different.
3. Run script

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from google.oauth2 import service_account
from googleapiclient.discovery import build
import re


# Path to service account key (update as needed)
SERVICE_ACCOUNT_FILE = '/content/drive/MyDrive/Colab Notebooks/Merchant Center API/service_account_key.json'

# API scope
SCOPES = ['https://www.googleapis.com/auth/content']

# Authenticate using the service account
credentials = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES
)

# Build the Google Content API client
service = build('content', 'v2.1', credentials=credentials)

# Merchant Center account ID (update if needed)
MERCHANT_ID = '5411908926'

# Read CSV with product data
csv_file_path = '/content/products.csv'  # Update path if needed
df = pd.read_csv(csv_file_path)

# Headers for web scraping
HEADERS = {
    "User-Agent": "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
    "X-Forwarded-For": "34.85.12.34"  # Simulated US IP
}

# Needs feed label, language, ID
def build_product_id(row):
    """Construct the correct product ID format"""
    return f"online:{row['language']}:{row['feed label']}:{row['id']}"

def get_product_info(product_id):
    """Fetch product details from Merchant Center API"""
    try:
        request = service.products().get(merchantId=MERCHANT_ID, productId=product_id)
        product_data = request.execute()
        return product_data
    except Exception as e:
        print(f"Error fetching product {product_id}: {e}")
        return None

def extract_price(url):
    """Scrape the correct price from the product page and extract a valid float."""
    try:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the price element (update selector if needed)
        price_element = soup.select_one("[data-testid='singleIssueProduct'] .btn-title")
        if price_element:
            price_text = price_element.text.strip()

            # Extract numbers and decimals (handles both '.' and ',')
            price_numbers = re.findall(r'\d+[\.,]?\d*', price_text)
            if price_numbers:
                # Replace comma with dot for float conversion (handles '1,99' → '1.99')
                clean_price = price_numbers[0].replace(",", ".")
                return float(clean_price)

        print(f"Skipping {url}: No valid price found.")
        return None
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None

def update_product_price(product_id, new_price):
    """Update the product price in Merchant Center"""
    try:
        product_data = get_product_info(product_id)
        if not product_data:
            return

        updated_product = {
            "price": {
                "value": str(new_price),
                "currency": "USD"
            }
        }

        request = service.products().update(
            merchantId=MERCHANT_ID,
            productId=product_id,
            body=updated_product
        )
        response = request.execute()
        print(f"Updated {product_id} to ${new_price}")
        return response
    except Exception as e:
        print(f"Error updating product {product_id}: {e}")

# Process each product
for _, row in df.iterrows():
    product_id = build_product_id(row)
    product_info = get_product_info(product_id)

    if product_info and 'link' in product_info:
        product_url = product_info['link']
        correct_price = extract_price(product_url)
        if correct_price:
            update_product_price(product_id, correct_price)
        else:
            print(f"Skipping update for {product_id}: Price not found.")
    else:
        print(f"Skipping {product_id}: No product link found.")

print("✅ Price updates completed.")


# Scraping Publicaiton Pages (schema) from a US IP and updating the images for single issue product

1. Prepare your CSV (e.g., merchant_center_products.csv) with the required columns.
2. Update CSV_FILE_PATH if it’s different.
3. Run script



In [None]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Google Merchant Center API Setup
SERVICE_ACCOUNT_FILE = "/content/drive/MyDrive/Colab Notebooks/Merchant Center API/service_account_key.json"
SCOPES = ["https://www.googleapis.com/auth/content"]
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
service = build("content", "v2.1", credentials=credentials)

# Your Merchant Center ID
MERCHANT_ID = "5411908926"

# Headers for scraping
HEADERS = {
    "User-Agent": "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)"
}

# CSV file with products
CSV_FILE_PATH = "/content/products_2025-02-25_15_05_17.csv"  # Update this path as needed

# Your provided functions
def build_product_id(row):
    """Construct the correct product ID format"""
    return f"online:{row['language']}:{row['feed label']}:{row['id']}"

def get_product_info(product_id):
    """Fetch product details from Merchant Center API"""
    try:
        request = service.products().get(merchantId=MERCHANT_ID, productId=product_id)
        product_data = request.execute()
        return product_data
    except Exception as e:
        print(f"Error fetching product {product_id}: {e}")
        return None

# Extract schema.org JSON-LD, selecting only Product type
def extract_schema(url):
    print(f"Step 1: Scraping URL: {url}")
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        response.encoding = 'utf-8'
        soup = BeautifulSoup(response.text, "html.parser", from_encoding="utf-8")
        script_tags = soup.find_all("script", type="application/ld+json")
        if not script_tags:
            print("Step 2: No JSON-LD scripts found")
            return None

        for i, script in enumerate(script_tags, 1):
            try:
                script_text = script.string.encode().decode('utf-8')
                data = json.loads(script_text)
                print(f"Step 2: Found schema {i}: {json.dumps(data, indent=2, ensure_ascii=False)}")
                if data.get("@type") == "Product":
                    print(f"Step 2: Selected Product schema {i}")
                    return data
            except json.JSONDecodeError:
                print(f"Step 2: Schema {i} is invalid JSON")
        print("Step 2: No Product schema found among scripts")
        return None
    except Exception as e:
        print(f"Step 2: Could not scrape {url}: {e}")
        return None

# Get the updated image from schema
def get_updated_image(schema):
    if not schema or schema.get("@type") != "Product":
        print("Step 3: No valid Product schema for image update")
        return None
    image = schema.get("image", None)
    if image:
        print(f"Step 3: Updated image found: {image}")
        return image
    print("Step 3: No image found in schema")
    return None

# Update product image in Merchant Center
def update_product_image(product_id, new_image):
    print(f"Step 4: Updating image for product {product_id}")
    try:
        # Fetch current product data
        product_data = get_product_info(product_id)
        if not product_data:
            print(f"Step 4: Skipping update - could not fetch product {product_id}")
            return

        # Update only the imageLink field
        updated_product = {
            "imageLink": new_image
        }
        request = service.products().update(
            merchantId=MERCHANT_ID,
            productId=product_id,
            body=updated_product,
            updateMask="imageLink"  # Only update the imageLink field
        )
        request.execute()
        print(f"Step 5: Successfully updated image for {product_id} to {new_image}")
    except Exception as e:
        print(f"Step 5: Failed to update image for {product_id}: {e}")

# Main function
def main():
    # Read CSV
    df = pd.read_csv(CSV_FILE_PATH)
    print(f"Loaded {len(df)} products from CSV")

    print("\nStarting product updates...\n")
    for i, row in df.iterrows():
        print(f"--- Processing Product {i+1} of {len(df)} ---")

        # Build product ID
        product_id = build_product_id(row)
        print(f"Product ID: {product_id}")

        # Get URL from CSV (assuming 'link' column exists)
        url = row.get("link")
        if not url:
            print("No URL found in CSV row, skipping")
            continue

        # Scrape schema and get new image
        schema = extract_schema(url)
        new_image = get_updated_image(schema)

        # Update product if new image is found
        if new_image:
            update_product_image(product_id, new_image)
        else:
            print(f"No new image to update for {product_id}")

        print(f"--- Finished Product {i+1} ---\n")

if __name__ == "__main__":
    main()

Loaded 15 products from CSV

Starting product updates...

--- Processing Product 1 of 15 ---
Product ID: online:en:US:vidas
Step 1: Scraping URL: https://www.pressreader.com/newspapers/n/vidas




Step 2: Found schema 1: {
  "@context": "https://schema.org",
  "@type": "BreadcrumbList",
  "itemListElement": [
    {
      "@type": "ListItem",
      "position": 1,
      "name": "Catalog",
      "item": "https://www.pressreader.com/catalog"
    },
    {
      "@type": "ListItem",
      "position": 2,
      "name": "Newspapers",
      "item": "https://www.pressreader.com/newspapers"
    },
    {
      "@type": "ListItem",
      "position": 3,
      "name": "Vidas",
      "item": "https://www.pressreader.com/newspapers/n/vidas"
    }
  ]
}
Step 2: Found schema 2: {
  "@context": "https://schema.org",
  "@type": "Product",
  "name": "Vidas Newspaper (Digital)",
  "description": "Vidas, published in Portuguese, is a newspaper from Portugal. Access Vidas online on PressReader or download issues to read later. Browse Vidas back issues in the <a href=\"https://www.pressreader.com/newspapers/n/vidas/issues\">archive</a>.",
  "image": "https://t.prcdn.co/img?file=9wah2025022200000000001001&

# Extracting Product Schema and Uploading it to a new data source on GMC (Single Issue)

1. Upload CSV with URLs
2. You can change feed name
3. Run *script*



In [None]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Google Merchant Center API Setup
SERVICE_ACCOUNT_FILE = "/content/drive/MyDrive/Colab Notebooks/Merchant Center API/service_account_key.json"
SCOPES = ["https://www.googleapis.com/auth/content"]
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
service = build("content", "v2.1", credentials=credentials)

# Your Merchant Center ID
merchant_id = "5411908926"

# Headers for scraping
HEADERS = {
    "User-Agent": "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)"
}

# CSV file with URLs
CSV_FILE_PATH = "/content/products_2025-02-28_16_53_07.csv"

# Fetch all supported countries
def get_all_supported_countries():
    try:
        response = service.regions().list(merchantId=merchant_id).execute()
        countries = [region['regionCode'] for region in response.get('resources', [])
                     if region.get('status') == 'active' and 'postalCodeArea' not in region]
        print(f"Fetched {len(countries)} supported countries")
        return countries if countries else ["US"]  # Fallback to US if empty
    except Exception as e:
        print(f"Failed to fetch countries: {e}")
        return ["US"]

# Extract schema.org JSON-LD, selecting only Product type
def extract_schema(url):
    print(f"Step 1: Scraping URL: {url}")
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        response.encoding = 'utf-8'
        soup = BeautifulSoup(response.text, "html.parser", from_encoding="utf-8")
        script_tags = soup.find_all("script", type="application/ld+json")
        if not script_tags:
            print("Step 2: No JSON-LD scripts found")
            return None

        for i, script in enumerate(script_tags, 1):
            try:
                script_text = script.string.encode().decode('utf-8')
                data = json.loads(script_text)
                print(f"Step 2: Found schema {i}: {json.dumps(data, indent=2, ensure_ascii=False)}")
                if data.get("@type") == "Product":
                    print(f"Step 2: Selected Product schema {i}")
                    return data
            except json.JSONDecodeError:
                print(f"Step 2: Schema {i} is invalid JSON")
        print("Step 2: No Product schema found among scripts")
        return None
    except Exception as e:
        print(f"Step 2: Could not scrape {url}: {e}")
        return None

# Process schema to get product data (first USD offer)
def process_product_data(schema):
    if not schema:
        print("Step 3: No schema provided")
        return None
    print(f"Step 3: Processing schema: {json.dumps(schema, indent=2, ensure_ascii=False)}")

    if schema.get("@type") != "Product":
        print(f"Step 3: Invalid type: {schema.get('@type')}")
        return None

    offers = schema.get("offers", [])
    if isinstance(offers, dict):
        offers = [offers]
    print(f"Step 3: Offers: {offers}")

    # Pick the first USD offer as the single-issue product
    for offer in offers:
        offer_name = offer.get("name", "")
        offer_currency = offer.get("priceCurrency", "")
        print(f"Step 3: Checking offer - Name: {offer_name}, Currency: {offer_currency}")
        if offer_currency == "USD":
            # Extract language code and last segment from URL
            url_parts = offer.get("url", "").split("/")
            last_segment = url_parts[-1]  # e.g., "the-boston-globe"
            # Find locale code (after domain, before content type)
            locale_code = ""
            content_types = ["newspapers", "magazines"]  # Known content types
            for i, part in enumerate(url_parts):
                if "pressreader.com" in url_parts[i-1] and part not in content_types and i+1 < len(url_parts) and url_parts[i+1] in content_types:
                    locale_code = part
                    break
            # Construct offerId with locale if present, no "-sub" for single issue
            offer_id = f"{locale_code}-{last_segment}" if locale_code else last_segment
            product_data = {
                "offerId": offer_id,  # e.g., "de-the-boston-globe" or "the-boston-globe"
                "title": offer_name,
                "description": schema.get("description", ""),
                "imageLink": schema.get("image", "https://via.placeholder.com/150"),
                "link": offer.get("url"),
                "contentLanguage": "en",
                "targetCountry": "US",
                "channel": "online",
                "availability": "in stock",
                "price": {"value": str(offer.get("price")), "currency": "USD"},
                "customLabel0": "Newspapers - Single Issue - EN"
            }
            print(f"Step 3: Processed product data - Title: {product_data['title']}")
            return product_data
    print("Step 3: No USD offer found")
    return None

# Upload product to Google Merchant Center
def upload_product(product_data):
    print(f"Step 4: Starting upload for {product_data['title']}")
    try:
        service.products().insert(merchantId=merchant_id, body=product_data).execute()
        print(f"Step 5: Successfully uploaded: {product_data['title']}")
    except Exception as e:
        print(f"Step 5: Failed to upload {product_data['title']}: {e}")

# Create a feed for all countries with a generic name
def create_data_source():
    feed_name = "PressReader Feed"  # Change this name if needed
    print("Creating feed...")
    try:
        existing_feeds = service.datafeeds().list(merchantId=merchant_id).execute()
        if any(feed["name"] == feed_name for feed in existing_feeds.get("resources", [])):
            print(f"Feed '{feed_name}' already exists")
            return

        # Get all countries
        countries = get_all_supported_countries()

        # Create feed with a name and all countries
        service.datafeeds().insert(
            merchantId=merchant_id,
            body={
                "name": feed_name,
                "contentType": "products",
                "fileName": "Newspapers - Single Issue - EN.csv",
                "targets": [
                    {"country": country, "language": "en", "includedDestinations": ["Shopping"]}
                    for country in countries
                ]
            }
        ).execute()
        print(f"Created feed: {feed_name} targeting {len(countries)} countries")
    except Exception as e:
        print(f"Failed to create feed: {e}")

# Main function
def main():
    df = pd.read_csv(CSV_FILE_PATH)
    create_data_source()

    print("\nStarting URL processing...\n")
    for i, url in enumerate(df["url"], 1):
        print(f"--- Processing URL {i} of {len(df['url'])} ---")
        schema = extract_schema(url)
        product_data = process_product_data(schema)
        if product_data:
            upload_product(product_data)
        print(f"--- Finished URL {i} ---\n")

if __name__ == "__main__":
    main()

Creating feed...
Fetched 0 supported countries
Created feed: PressReader Feed targeting 1 countries

Starting URL processing...

--- Processing URL 1 of 23 ---
Step 1: Scraping URL: https://www.pressreader.com/newspapers/n/welt-am-sonntag




Step 2: Found schema 1: {
  "@context": "https://schema.org",
  "@type": "BreadcrumbList",
  "itemListElement": [
    {
      "@type": "ListItem",
      "position": 1,
      "name": "Catalog",
      "item": "https://www.pressreader.com/catalog"
    },
    {
      "@type": "ListItem",
      "position": 2,
      "name": "Newspapers",
      "item": "https://www.pressreader.com/newspapers"
    },
    {
      "@type": "ListItem",
      "position": 3,
      "name": "Welt am Sonntag",
      "item": "https://www.pressreader.com/newspapers/n/welt-am-sonntag"
    }
  ]
}
Step 2: Found schema 2: {
  "@context": "https://schema.org",
  "@type": "Product",
  "name": "Welt am Sonntag Newspaper (Digital)",
  "description": "WELT AM SONNTAG stands for outstanding journalistic expertise and offers strong investigative stories from business and politics.",
  "image": "https://t.prcdn.co/img?file=30862025022300000000001001&page=1&width=269&retina=2",
  "releaseDate": "2025-02-23T00:00:00.000Z",
  "gtin13

# Extracting Product Schema and Uploading it to a new data source on GMC (Subscription)

1. Upload CSV with URLs
2. You can change feed name
3. Run *script*

In [None]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Google Merchant Center API Setup
SERVICE_ACCOUNT_FILE = "/content/drive/MyDrive/Colab Notebooks/Merchant Center API/service_account_key.json"
SCOPES = ["https://www.googleapis.com/auth/content"]
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
service = build("content", "v2.1", credentials=credentials)

# Your Merchant Center ID
merchant_id = "5411908926"

# Headers for scraping
HEADERS = {
    "User-Agent": "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)"
}

# CSV file with URLs
CSV_FILE_PATH = "/content/newspapers.csv"

# Fetch all supported countries
def get_all_supported_countries():
    try:
        response = service.regions().list(merchantId=merchant_id).execute()
        countries = [region['regionCode'] for region in response.get('resources', [])
                     if region.get('status') == 'active' and 'postalCodeArea' not in region]
        print(f"Fetched {len(countries)} supported countries")
        return countries if countries else ["US"]
    except Exception as e:
        print(f"Failed to fetch countries: {e}")
        return ["US"]

# Extract schema.org JSON-LD, selecting only Product type
def extract_schema(url):
    print(f"Step 1: Scraping URL: {url}")
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        response.encoding = 'utf-8'
        soup = BeautifulSoup(response.text, "html.parser", from_encoding="utf-8")
        script_tags = soup.find_all("script", type="application/ld+json")
        if not script_tags:
            print("Step 2: No JSON-LD scripts found")
            return None

        for i, script in enumerate(script_tags, 1):
            try:
                script_text = script.string.encode().decode('utf-8')
                data = json.loads(script_text)
                print(f"Step 2: Found schema {i}: {json.dumps(data, indent=2, ensure_ascii=False)}")
                if data.get("@type") == "Product":
                    print(f"Step 2: Selected Product schema {i}")
                    return data
            except json.JSONDecodeError:
                print(f"Step 2: Schema {i} is invalid JSON")
        print("Step 2: No Product schema found among scripts")
        return None
    except Exception as e:
        print(f"Step 2: Could not scrape {url}: {e}")
        return None

# Process schema to get product data (second USD offer)
def process_product_data(schema):
    if not schema:
        print("Step 3: No schema provided")
        return None
    print(f"Step 3: Processing schema: {json.dumps(schema, indent=2, ensure_ascii=False)}")

    if schema.get("@type") != "Product":
        print(f"Step 3: Invalid type: {schema.get('@type')}")
        return None

    offers = schema.get("offers", [])
    if isinstance(offers, dict):
        offers = [offers]
    print(f"Step 3: Offers: {offers}")

    # Collect all USD offers
    usd_offers = []
    for offer in offers:
        offer_name = offer.get("name", "")
        offer_currency = offer.get("priceCurrency", "")
        print(f"Step 3: Checking offer - Name: {offer_name}, Currency: {offer_currency}")
        if offer_currency == "USD":
            usd_offers.append(offer)

    # Select the second USD offer (index 1)
    if len(usd_offers) >= 2:
        offer = usd_offers[1]  # Second offer
        offer_name = offer.get("name", "")
        # Extract language code and last segment from URL
        url_parts = offer.get("url", "").split("/")
        last_segment = url_parts[-1]  # e.g., "the-boston-globe"
        # Find locale code (after domain, before content type)
        locale_code = ""
        content_types = ["newspapers", "magazines"]  # Known content types
        for i, part in enumerate(url_parts):
            if "pressreader.com" in url_parts[i-1] and part not in content_types and i+1 < len(url_parts) and url_parts[i+1] in content_types:
                locale_code = part
                break
        # Construct offerId with locale if present
        offer_id = f"{locale_code}-{last_segment}-sub" if locale_code else f"{last_segment}-sub"
        product_data = {
            "offerId": offer_id,  # e.g., "pt-br-the-boston-globe-sub" or "the-boston-globe-sub"
            "title": offer_name,
            "description": schema.get("description", ""),
            "imageLink": schema.get("image", "https://via.placeholder.com/150"),
            "link": offer.get("url"),
            "contentLanguage": "en",
            "targetCountry": "US",
            "channel": "online",
            "availability": "in stock",
            "price": {"value": str(offer.get("price")), "currency": "USD"},
            "customLabel0": "newspapers - subscription - en"
        }
        print(f"Step 3: Processed product data (second USD offer) - Title: {product_data['title']}")
        return product_data
    elif usd_offers:
        print("Step 3: Only one USD offer found, skipping (wanted second offer)")
        return None
    else:
        print("Step 3: No USD offers found")
        return None

# Upload product to Google Merchant Center
def upload_product(product_data):
    print(f"Step 4: Starting upload for {product_data['title']}")
    try:
        service.products().insert(merchantId=merchant_id, body=product_data).execute()
        print(f"Step 5: Successfully uploaded: {product_data['title']}")
    except Exception as e:
        print(f"Step 5: Failed to upload {product_data['title']}: {e}")

# Create a new feed for all countries
def create_data_source():
    feed_name = "PressReader Feed 2"  # New feed name
    print("Creating feed...")
    try:
        existing_feeds = service.datafeeds().list(merchantId=merchant_id).execute()
        if any(feed["name"] == feed_name for feed in existing_feeds.get("resources", [])):
            print(f"Feed '{feed_name}' already exists")
            return

        # Get all countries
        countries = get_all_supported_countries()

        # Create new feed
        service.datafeeds().insert(
            merchantId=merchant_id,
            body={
                "name": feed_name,
                "contentType": "products",
                "fileName": "pressreader_products_2.csv",
                "targets": [
                    {"country": country, "language": "en", "includedDestinations": ["Shopping"]}
                    for country in countries
                ]
            }
        ).execute()
        print(f"Created feed: {feed_name} targeting {len(countries)} countries")
    except Exception as e:
        print(f"Failed to create feed: {e}")

# Main function
def main():
    df = pd.read_csv(CSV_FILE_PATH)
    create_data_source()

    print("\nStarting URL processing...\n")
    for i, url in enumerate(df["url"], 1):
        print(f"--- Processing URL {i} of {len(df['url'])} ---")
        schema = extract_schema(url)
        product_data = process_product_data(schema)
        if product_data:
            upload_product(product_data)
        print(f"--- Finished URL {i} ---\n")

if __name__ == "__main__":
    main()

Creating feed...
Feed 'PressReader Feed 2' already exists

Starting URL processing...

--- Processing URL 1 of 2453 ---
Step 1: Scraping URL: https://www.pressreader.com/newspapers/n/yuma-sun-this-week




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
--- Processing URL 2395 of 2453 ---
Step 1: Scraping URL: https://www.pressreader.com/newspapers/n/the-guardian-usa
Step 2: Found schema 1: {
  "@context": "https://schema.org",
  "@type": "BreadcrumbList",
  "itemListElement": [
    {
      "@type": "ListItem",
      "position": 1,
      "name": "Catalog",
      "item": "https://www.pressreader.com/catalog"
    },
    {
      "@type": "ListItem",
      "position": 2,
      "name": "Newspapers",
      "item": "https://www.pressreader.com/newspapers"
    },
    {
      "@type": "ListItem",
      "position": 3,
      "name": "The Guardian (USA)",
      "item": "https://www.pressreader.com/newspapers/n/the-guardian-usa"
    }
  ]
}
Step 2: Found schema 2: {
  "@context": "https://schema.org",
  "@type": "Product",
  "name": "The Guardian (USA) Newspaper (Digital)",
  "description": "The Guardian (USA), published in English, is a newspaper from United States. Access The Guard

# Delete Products

1. Upload CSV with required columns
2. Run script

In [None]:
import pandas as pd
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Google Merchant Center API Setup
SERVICE_ACCOUNT_FILE = "/content/drive/MyDrive/Colab Notebooks/Merchant Center API/service_account_key.json"
SCOPES = ["https://www.googleapis.com/auth/content"]
MERCHANT_ID = "5411908926"

# CSV file with products to delete
CSV_FILE_PATH = "/content/1.csv"  # Update this path

# Authenticate and build the service
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
service = build("content", "v2.1", credentials=credentials)

# Your provided functions
def build_product_id(row):
    """Construct the correct product ID format"""
    return f"online:{row['language']}:{row['feed label']}:{row['id']}"

def get_product_info(product_id):
    """Fetch product details from Merchant Center API"""
    try:
        request = service.products().get(merchantId=MERCHANT_ID, productId=product_id)
        product_data = request.execute()
        return product_data
    except Exception as e:
        print(f"Error fetching product {product_id}: {e}")
        return None

# Delete a product by productId
def delete_product(product_id):
    try:
        service.products().delete(merchantId=MERCHANT_ID, productId=product_id).execute()
        print(f"Successfully deleted product: {product_id}")
    except Exception as e:
        print(f"Failed to delete product {product_id}: {e}")

# Main function to process CSV and delete products
def main():
    # Read the CSV
    try:
        df = pd.read_csv(CSV_FILE_PATH)
        print(f"Loaded CSV with {len(df)} products to delete")
    except Exception as e:
        print(f"Failed to load CSV: {e}")
        return

    # Ensure required columns exist
    required_columns = ['language', 'feed label', 'id']
    if not all(col in df.columns for col in required_columns):
        print(f"CSV must contain these columns: {required_columns}")
        return

    # Process each row
    for index, row in df.iterrows():
        product_id = build_product_id(row)
        print(f"\nProcessing row {index + 1} of {len(df)}: {product_id}")

        # Optional: Verify product exists (comment out if not needed)
        product_info = get_product_info(product_id)
        if product_info:
            print(f"Found product: {product_info.get('title', 'Unknown title')}")
        else:
            print(f"Product not found or error, attempting deletion anyway")

        # Delete the product
        delete_product(product_id)

if __name__ == "__main__":
    main()

Loaded CSV with 23 products to delete

Processing row 1 of 23: online:en:US:newspapers-n-rheinische-post-geldern-an-kevelaer
Found product: Rheinische Post - Geldern an Kevelaer Newspaper - 1 - month Subscription (Digital)
Successfully deleted product: online:en:US:newspapers-n-rheinische-post-geldern-an-kevelaer

Processing row 2 of 23: online:en:US:newspapers-n-united-daily
Found product: United Daily News (Taiwan) Newspaper - 12 - months Subscription (Digital)
Successfully deleted product: online:en:US:newspapers-n-united-daily

Processing row 3 of 23: online:en:US:newspapers-n-sowetan
Found product: Sowetan Newspaper - 1 - month Subscription (Digital)
Successfully deleted product: online:en:US:newspapers-n-sowetan

Processing row 4 of 23: online:en:US:newspapers-n-mint-hyderabad
Found product: Mint Hyderabad Newspaper - 6 - months Subscription (Digital)
Successfully deleted product: online:en:US:newspapers-n-mint-hyderabad

Processing row 5 of 23: online:en:US:newspapers-n-aviation

# New Schema (single issue)

In [None]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Google Merchant Center API Setup
SERVICE_ACCOUNT_FILE = "/content/drive/MyDrive/Colab Notebooks/Merchant Center API/service_account_key.json"
SCOPES = ["https://www.googleapis.com/auth/content"]
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
service = build("content", "v2.1", credentials=credentials)

# Your Merchant Center ID
merchant_id = "5411908926"

# Headers for scraping
HEADERS = {
    "User-Agent": "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)"
}

# CSV file with URLs
CSV_FILE_PATH = "/content/newspapers.csv"

# Fetch all supported countries
def get_all_supported_countries():
    try:
        response = service.regions().list(merchantId=merchant_id).execute()
        countries = [region['regionCode'] for region in response.get('resources', [])
                     if region.get('status') == 'active' and 'postalCodeArea' not in region]
        print(f"Fetched {len(countries)} supported countries")
        return countries if countries else ["US"]
    except Exception as e:
        print(f"Failed to fetch countries: {e}")
        return ["US"]

# Extract schema.org JSON-LD, selecting only Product type
def extract_schema(url):
    print(f"Step 1: Scraping URL: {url}")
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        response.encoding = 'utf-8'
        soup = BeautifulSoup(response.text, "html.parser", from_encoding="utf-8")
        script_tags = soup.find_all("script", type="application/ld+json")
        if not script_tags:
            print("Step 2: No JSON-LD scripts found")
            return None

        for i, script in enumerate(script_tags, 1):
            try:
                script_text = script.string.encode().decode('utf-8')
                data = json.loads(script_text)
                print(f"Step 2: Found schema {i}: {json.dumps(data, indent=2, ensure_ascii=False)}")
                if data.get("@type") == "Product":
                    print(f"Step 2: Selected Product schema {i}")
                    return data
            except json.JSONDecodeError:
                print(f"Step 2: Schema {i} is invalid JSON")
        print("Step 2: No Product schema found among scripts")
        return None
    except Exception as e:
        print(f"Step 2: Could not scrape {url}: {e}")
        return None

# Process schema to get product data (first single-issue USD offer)
def process_product_data(schema):
    if not schema:
        print("Step 3: No schema provided")
        return None
    print(f"Step 3: Processing schema: {json.dumps(schema, indent=2, ensure_ascii=False)}")

    if schema.get("@type") != "Product":
        print(f"Step 3: Invalid type: {schema.get('@type')}")
        return None

    offers = schema.get("offers", [])
    if isinstance(offers, dict):
        offers = [offers]
    print(f"Step 3: Offers: {offers}")

    # Pick the first USD single-issue offer
    for offer in offers:
        offer_name = offer.get("name", "")
        offer_currency = offer.get("priceCurrency", "")
        offer_type = offer.get("offerType", "")  # Check for subscription
        print(f"Step 3: Checking offer - Name: {offer_name}, Currency: {offer_currency}, Type: {offer_type}")
        # Filter for single-issue (no "Subscription" offerType) and USD
        if offer_currency == "USD" and offer_type != "Subscription":  # Change Currency Accordingly
            # Extract language code and last segment from URL
            url_parts = offer.get("url", "").split("/")
            last_segment = url_parts[-1]  # e.g., "visi"
            # Find locale code (after domain, before content type)
            locale_code = ""
            content_types = ["newspapers", "magazines"]  # Known content types
            for i, part in enumerate(url_parts):
                if "pressreader.com" in url_parts[i-1] and part not in content_types and i+1 < len(url_parts) and url_parts[i+1] in content_types:
                    locale_code = part
                    break
            # Construct offerId with locale if present, no "-sub" for single issue
            offer_id = f"{locale_code}-{last_segment}" if locale_code else last_segment
            product_data = {
                "offerId": offer_id,  # e.g., "de-visi" or "visi"
                "title": offer_name,
                "description": schema.get("description", ""),
                "imageLink": schema.get("image", "https://via.placeholder.com/150"),
                "link": offer.get("url"),
                "contentLanguage": "en",
                "targetCountry": "US",
                "channel": "online",
                "availability": "in stock",
                "price": {"value": str(offer.get("price")), "currency": offer_currency},
                "customLabel0": "Newspapers - Single Issue - EN"
            }
            print(f"Step 3: Processed product data - Title: {product_data['title']}")
            return product_data
    print("Step 3: No USD single-issue offer found")
    return None

# Upload product to Google Merchant Center
def upload_product(product_data):
    print(f"Step 4: Starting upload for {product_data['title']}")
    try:
        service.products().insert(merchantId=merchant_id, body=product_data).execute()
        print(f"Step 5: Successfully uploaded: {product_data['title']}")
    except Exception as e:
        print(f"Step 5: Failed to upload {product_data['title']}: {e}")

# Create a feed for all countries with a generic name
def create_data_source():
    feed_name = "PressReader Feed"  # Change this name if needed
    print("Creating feed...")
    try:
        existing_feeds = service.datafeeds().list(merchantId=merchant_id).execute()
        if any(feed["name"] == feed_name for feed in existing_feeds.get("resources", [])):
            print(f"Feed '{feed_name}' already exists")
            return

        # Get all countries
        countries = get_all_supported_countries()

        # Create feed with a name and all countries
        service.datafeeds().insert(
            merchantId=merchant_id,
            body={
                "name": feed_name,
                "contentType": "products",
                "fileName": "Newspapers - Single Issue - EN.csv",
                "targets": [
                    {"country": country, "language": "en", "includedDestinations": ["Shopping"]}
                    for country in countries
                ]
            }
        ).execute()
        print(f"Created feed: {feed_name} targeting {len(countries)} countries")
    except Exception as e:
        print(f"Failed to create feed: {e}")

# Main function
def main():
    df = pd.read_csv(CSV_FILE_PATH)
    create_data_source()

    print("\nStarting URL processing...\n")
    for i, url in enumerate(df["url"], 1):
        print(f"--- Processing URL {i} of {len(df['url'])} ---")
        schema = extract_schema(url)
        product_data = process_product_data(schema)
        if product_data:
            upload_product(product_data)
        print(f"--- Finished URL {i} ---\n")

if __name__ == "__main__":
    main()

In [None]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Google Merchant Center API Setup
SERVICE_ACCOUNT_FILE = "/content/drive/MyDrive/Colab Notebooks/Merchant Center API/service_account_key.json"
SCOPES = ["https://www.googleapis.com/auth/content"]
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
service = build("content", "v2.1", credentials=credentials)

# Your Merchant Center ID
merchant_id = "5411908926"

# Headers for scraping
HEADERS = {
    "User-Agent": "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)"
}

# CSV file with URLs
CSV_FILE_PATH = "/content/newspapers.csv"

# Fetch all supported countries
def get_all_supported_countries():
    try:
        response = service.regions().list(merchantId=merchant_id).execute()
        countries = [region['regionCode'] for region in response.get('resources', [])
                     if region.get('status') == 'active' and 'postalCodeArea' not in region]
        print(f"Fetched {len(countries)} supported countries")
        return countries if countries else ["US"]
    except Exception as e:
        print(f"Failed to fetch countries: {e}")
        return ["US"]

# Extract schema.org JSON-LD, selecting only Product type
def extract_schema(url):
    print(f"Step 1: Scraping URL: {url}")
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        response.encoding = 'utf-8'
        soup = BeautifulSoup(response.text, "html.parser", from_encoding="utf-8")
        script_tags = soup.find_all("script", type="application/ld+json")
        if not script_tags:
            print("Step 2: No JSON-LD scripts found")
            return None

        for i, script in enumerate(script_tags, 1):
            try:
                script_text = script.string.encode().decode('utf-8')
                data = json.loads(script_text)
                print(f"Step 2: Found schema {i}: {json.dumps(data, indent=2, ensure_ascii=False)}")
                if data.get("@type") == "Product":
                    print(f"Step 2: Selected Product schema {i}")
                    return data
            except json.JSONDecodeError:
                print(f"Step 2: Schema {i} is invalid JSON")
        print("Step 2: No Product schema found among scripts")
        return None
    except Exception as e:
        print(f"Step 2: Could not scrape {url}: {e}")
        return None

# Process schema to get product data (second USD subscription offer)
def process_product_data(schema):
    if not schema:
        print("Step 3: No schema provided")
        return None
    print(f"Step 3: Processing schema: {json.dumps(schema, indent=2, ensure_ascii=False)}")

    if schema.get("@type") != "Product":
        print(f"Step 3: Invalid type: {schema.get('@type')}")
        return None

    offers = schema.get("offers", [])
    if isinstance(offers, dict):
        offers = [offers]
    print(f"Step 3: Offers: {offers}")

    # Collect all USD subscription offers
    usd_subscription_offers = []
    for offer in offers:
        offer_name = offer.get("name", "")
        offer_currency = offer.get("priceCurrency", "")
        offer_type = offer.get("offerType", "")  # Check for subscription
        print(f"Step 3: Checking offer - Name: {offer_name}, Currency: {offer_currency}, Type: {offer_type}")
        if offer_currency == "USD" and offer_type == "Subscription":
            usd_subscription_offers.append(offer)

    # Select the second USD subscription offer (index 1)
    if len(usd_subscription_offers) >= 2:
        offer = usd_subscription_offers[1]  # Second subscription offer
        offer_name = offer.get("name", "")
        # Extract language code and last segment from URL
        url_parts = offer.get("url", "").split("/")
        last_segment = url_parts[-1]  # e.g., "visi"
        locale_code = ""
        content_types = ["newspapers", "magazines"]
        for i, part in enumerate(url_parts):
            if "pressreader.com" in url_parts[i-1] and part not in content_types and i+1 < len(url_parts) and url_parts[i+1] in content_types:
                locale_code = part
                break
        offer_id = f"{locale_code}-{last_segment}-sub" if locale_code else f"{last_segment}-sub"
        product_data = {
            "offerId": offer_id,  # e.g., "de-visi-sub" or "visi-sub"
            "title": offer_name,
            "description": schema.get("description", ""),
            "imageLink": schema.get("image", "https://via.placeholder.com/150"),
            "link": offer.get("url"),
            "contentLanguage": "en",
            "targetCountry": "US",
            "channel": "online",
            "availability": "in stock",
            "price": {"value": str(offer.get("price")), "currency": offer_currency},  # Dynamic currency
            "customLabel0": "magazines - subscription - en"
        }
        print(f"Step 3: Processed product data (second USD subscription offer) - Title: {product_data['title']}")
        return product_data
    elif usd_subscription_offers:
        print("Step 3: Only one USD subscription offer found, skipping (wanted second offer)")
        return None
    else:
        print("Step 3: No USD subscription offers found")
        return None

# Upload product to Google Merchant Center
def upload_product(product_data):
    print(f"Step 4: Starting upload for {product_data['title']}")
    try:
        service.products().insert(merchantId=merchant_id, body=product_data).execute()
        print(f"Step 5: Successfully uploaded: {product_data['title']}")
    except Exception as e:
        print(f"Step 5: Failed to upload {product_data['title']}: {e}")

# Create a new feed for all countries
def create_data_source():
    feed_name = "PressReader Feed 2"  # New feed name
    print("Creating feed...")
    try:
        existing_feeds = service.datafeeds().list(merchantId=merchant_id).execute()
        if any(feed["name"] == feed_name for feed in existing_feeds.get("resources", [])):
            print(f"Feed '{feed_name}' already exists")
            return

        # Get all countries
        countries = get_all_supported_countries()

        # Create new feed
        service.datafeeds().insert(
            merchantId=merchant_id,
            body={
                "name": feed_name,
                "contentType": "products",
                "fileName": "pressreader_products_2.csv",
                "targets": [
                    {"country": country, "language": "en", "includedDestinations": ["Shopping"]}
                    for country in countries
                ]
            }
        ).execute()
        print(f"Created feed: {feed_name} targeting {len(countries)} countries")
    except Exception as e:
        print(f"Failed to create feed: {e}")

# Main function
def main():
    df = pd.read_csv(CSV_FILE_PATH)
    create_data_source()

    print("\nStarting URL processing...\n")
    for i, url in enumerate(df["url"], 1):
        print(f"--- Processing URL {i} of {len(df['url'])} ---")
        schema = extract_schema(url)
        product_data = process_product_data(schema)
        if product_data:
            upload_product(product_data)
        print(f"--- Finished URL {i} ---\n")
        time.sleep(1)

if __name__ == "__main__":
    main()