In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import urllib3
import html
import re
import json
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from office365.sharepoint.lists.list import List
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.client_context import UserCredential
from office365.sharepoint.files.file import File


In [5]:
# API Query with pagination control (ability to iterate through all pages/records)

import requests
import time

# API base URL and static parameters
api_url = "https://my.intelligence2day.com/components/api/search.cfc"
params = {
    "method": "query",
    "APIid": "I2DE_4880557FFC6ABA165C916880849F9CAC",
    "authKey": "c51e7492-ab7f-46d8-9d10-edd4e434d2c1",
    "customerGUID": "b6150206-d9b1-4963-8907-22b7695c0477",
    "accessGroups": "8329",
    "queryString": "*:*",  # Query for all records
    "returnFields": "title,summary,attachmenturl",  # Fields to return
    "maxRows": 100,  # Limit to 10 results
    "sort": "uid desc",  # Sort by UID in descending order
}

# Pagination control
cursor = "*"  # Start with an empty cursor for the first request
has_more = True
total_articles = 0
page = 1
all_articles = []  # To store all article data

while has_more:
    time.sleep(2)
    print(f"\n--- Fetching Page {page} ---")

    # Update the cursor in the request parameters for pagination
    params["cursorMark"] = cursor

    # Make the request
    response = requests.get(api_url, params=params, verify=False)

    # Print the status code
    print(f"Status Code: {response.status_code}")
    
    if response.status_code == 200:
        try:
            data = response.json()  # Parse the response as JSON
            print("Returned Data:")
            print(data)  # Print the raw JSON response
            
            articles = data.get("docs", [])
            next_cursor = data.get("nextCursormark", None)

            if not articles:
                print("No more articles returned.")
                break

            print(f"Retrieved {len(articles)} articles on page {page}.")

            # Print the articles' title, summary, and URL
            for i, article in enumerate(articles, 1):
                title = article.get("headline", "No title")
                summary = article.get("summary", "No summary")
                url = article.get("url", "No URL")

                all_articles.append({"title": title, "summary": summary, "url": url})

                print(f"\nArticle {total_articles + i}")
                print(f"Title   : {title}")
                print(f"Summary : {summary}")
                print(f"URL     : {url}")

            total_articles += len(articles)
            page += 1

            # Prepare for the next iteration with the nextCursormark
            if next_cursor:
                cursor = next_cursor  # Update the cursor for the next request
            else:
                has_more = False  # No more pages, end the loop

        except ValueError:
            print("Error: Response is not valid JSON.")
            break
    else:
        print(f"Request failed with status code {response.status_code}")
        break

print(f"\n✅ Total articles fetched: {total_articles}")

# Save to CSV
#with open("articles.csv", mode="w", newline="", encoding="utf-8") as file:
#    writer = csv.DictWriter(file, fieldnames=["title", "summary", "url"])
#    writer.writeheader()
#    writer.writerows(all_articles)



--- Fetching Page 1 ---
Status Code: 200
Returned Data:
Retrieved 100 articles on page 1.

Article 1
Title   : No title
Summary : 
URL     : No URL

Article 2
Title   : No title
Summary : 
URL     : No URL

Article 3
Title   : No title
Summary : 
URL     : No URL

Article 4
Title   : No title
Summary : 
URL     : No URL

Article 5
Title   : No title
Summary : I am happy to announce that the Vertiv\u2122 CoolLoop Trim Cooler has been crowned "Hardware Product of the Year" at the Network Computing Awards 2025! \ud83c\udfc5<br><br>This award reflects the passion, expertise, and collaborative spirit of our incredible teams at Vertiv. Together, we\u2019re shaping the future of AI-ready data centers, delivering next-generation free cooling solutions that are eco-compliant, flexible, and scalable\u2014designed to meet tomorrow\u2019s tech challenges, today!<br><br>Thank you to everyone who contributed to this s
URL     : No URL

Article 6
Title   : No title
Summary : \ud83c\udf89 We\u2019re 

KeyboardInterrupt: 

In [7]:
# API Query to test functionality (without pagination)

date_constraint = "2025-04-02T06:04:11Z"

# API base URL and static parameters
api_url = "https://my.intelligence2day.com/components/api/search.cfc"
params = {
    "method": "query",
    "APIid": "I2DE_4880557FFC6ABA165C916880849F9CAC",
    "authKey": "c51e7492-ab7f-46d8-9d10-edd4e434d2c1",
    "customerGUID": "b6150206-d9b1-4963-8907-22b7695c0477",
    "accessGroups": "8329",
    "returnFields": "*",
    "queryString": "dateline:[NOW-14DAYS TO NOW] AND topicId:135576",  # Query for all records within time range
    #"queryString": "dateline:[NOW-1MONTH TO NOW]",  # Query for all records within time range
    #"queryString": f"dateline:[{last_run_date} TO NOW] AND topicId:135576", # Query for all records between the last run date (max run date in excel) and now & on topic ID
    "maxRows": 5,  # Limit to x results
    "sort": "dateline desc",  # Sort by 

}

total_articles = 0
all_articles = []  # To store all article data


# Make the request
response = requests.get(api_url, params=params, verify=False)

# Print the status code
print(f"Status Code: {response.status_code}")
    
if response.status_code == 200:
    try:
        data = response.json()  # Parse the response as JSON
        print("Returned Data:")
        print(data)  # Print the raw JSON response
            
        articles = data.get("docs", [])


        if not articles:
            print("No more articles returned.")
            

        print(f"Retrieved {len(articles)} articles")

        # Print the articles' title, summary, and URL
        for i, article in enumerate(articles, 1):
            title = article.get("headline", "No title")
            summary = article.get("summary", "No summary")
            url = article.get("url", "No URL")
            date = article.get("dateline", "No date")

            all_articles.append({"title": title, "summary": summary, "url": url})

            print(f"\nArticle {total_articles + i}")
            print(f"Title   : {title}")
            print(f"Summary : {summary}")
            print(f"URL     : {url}")
            print(f"Date    : {date}")


    except ValueError:
        print("Error: Response is not valid JSON.")
        
else:
    print(f"Request failed with status code {response.status_code}")
    
total_articles = len(all_articles)
print(f"\n✅ Total articles fetched: {total_articles}")



Status Code: 200
Returned Data:
{'numFound': 54, 'start': 0, 'docs': [{'uid': '34987378', 'uid_int': 34987378, 'customerGUID': 'b6150206-d9b1-4963-8907-22b7695c0477', 'url': 'https://my.intelligence2day.com/cc/view/article/?a=564be6c18962ffc3be034c90e38befe2', 'modified': "{ts '2025-05-30 09:41:1748598116'}", 'docHash': '564be6c18962ffc3be034c90e38befe2', 'AI_category': ['/Business & Industrial/Industrial Materials & Equipment'], 'AI_entity_EVENT': ['interview', 'interview'], 'AI_entity_LOCATION': ['Rayong', 'Thailand'], 'AI_entity_ORGANIZATION': ['Copeland', 'compressors', 'compressors', 'HVACR'], 'AI_entity_OTHER': ['production systems', 'heat pumps', 'heat pumps', 'leadership', 'sector', 'nnRead', 'https://ow.ly/YzTm50W0VL9'], 'AI_entity_PERSON': ['Veerachartyanukul', 'Ong-art'], 'AI_sentiment': 0.300000011921, 'AI_meta': ['{"Copeland": {"link": "", "mid": "/g/11c4bhyqns"}, "production systems": {"link": "", "mid": ""}, "compressors": {"link": "", "mid": ""}, "heat pumps": {"link": 