In [None]:
# Jupyter lab is a browser-based development environment

# Library for HTTP-requests
import requests
# Library for csv-files
import csv
# Library for extracting and curating news articles
from newspaper import Article
from newspaper import Config
from newspaper.article import ArticleException
# Google's generative AI library
from google import genai

# Connect to Google's gen AI client
client = genai.Client(api_key='')
# What kind of browser the newspaper library mimics, when making HTTP-requests to fetch articles.
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
config = Config()
# Configures the browser user agent.
config.browser_user_agent = user_agent
# Fetches relevant articles from News API
articles = ('https://newsapi.org/v2/everything?'
       'q=("supply chain risk" OR "supply chain disruption" OR "supply chain threat" OR "supply chain disturbance" OR "supply chain delays") AND NOT "software supply chain"&'
       'from=2025-09-20&'
       'sortBy=popularity&'
      'apiKey=')
response = requests.get(articles)
print(len(response.json()['articles']))
# Fields of CSV-file
fields = ['Risk category','Description of risk','Cause of risk','Supply chain entities related to risk','Geographical locations of risk','Business sectors related to risk','News article url']
filename = 'Supply_chain_risks.txt'
 Creates a CSV-file
with open(filename, 'a') as f:
    # First writes the fields to the CSV-file
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(fields)
    # Goes through all the articles returned by the News API
    for article in response.json()['articles']:
        url = article['url']
        print(url)
        # Fetches the article based on its url.
        article = Article(url,config=config)
        try:
            # Using the newspaper library, the article is downloaded and parsed to fetch the news article in a string format.
            article.download()
            article.parse()
            # If the string has some content, the LLM is prompted with it.
            if(len(article.text)>0):
                f.write(f"{article.text}\n")
                # The prompt includes the categorization of risks and their characteristics, the wanted result format and information for the LLM on how to conduct the identification of supply chain risks.
                message = "You are a supply chain expert trying to detect supply chain risks from news articles. The following is categorization of supply chain risks. Risks are listed in the following formula: {name_of_risk,[examples_of_cause_of_risk],[examples_of_effects_risk_has_on_supply_chain]} and ; separating the different risks. {Political Risk,[Terrorism, trade wars, export bans, nationalization of industries, unstable government policies],[Border closures delaying goods, regulatory uncertainty, increased tariffs, disrupted sourcing, cost increases due to rerouting]};{Environmental Risk,[Floods, wildfires, droughts, earthquakes, hurricanes, extreme weather affecting transport or raw materials],[Delayed shipments, damaged inventory, reduced material availability, shutdowns of manufacturing hubs, increased procurement costs]};{Financial Risk,[Bankruptcy of suppliers, currency volatility, economic recession, delayed payments, loss of investor confidence],[Cash flow disruption, halted supplier operations, increased borrowing costs, renegotiation of contracts, withdrawal from markets]};{Supply Risk,[Supplier monopoly, poor quality control, single-source dependency],[Production delays, recalls, increased sourcing costs, customer dissatisfaction, emergency supplier onboarding]};{Demand Risk,[Forecast errors, unexpected market shifts, new competitor products, changing customer preferences],[Overstock or stockouts, lost sales, increased holding costs, inefficiencies in upstream planning, resource misallocation]};{Logistics Risk,[Inefficiencies in logistics operations, customs delays, container shortages, infrastructure failures],[Delivery delays, increased shipping costs, production halts, order backlogs, use of expensive express transport]};{System Risk,[IT integration issues, outdated ERP systems, software bugs],[Order mismanagement, data inaccuracy, downtime in planning systems, reduced operational visibility, manual workarounds]};{Operational Risk,[Worker strikes, Human errors, equipment breakdown, poor process design, safety incidents, labor shortage],[Production downtime, quality issues, increased cost per unit, missed deadlines, shipment errors]};{Social Risk,[Unsafe labor conditions, community opposition, legal disputes, reputational scandals],[Factory closures, supplier termination, loss of contracts, boycotts, increased compliance costs]};{Cybersecurity Risk,[Data breaches, ransomware, phishing, weak security protocols in SCM software],[Halted operations, data theft, regulatory fines, compromised customer trust, disrupted digital communication]};{Behavioral Risk,[Poor cross-functional coordination, hidden agendas in supplier relationships, unbalanced information exchange between suppliers],[Forecast inaccuracies, misaligned goals, delayed decision-making, inefficiency, unaddressed vulnerabilities, bullwhip effect]};{Cultural Risk,[Language barriers, different work ethics, resistance to digital tools, misinterpretation in global teams],[Poor collaboration, delays in negotiations, reduced operational consistency, conflict in joint ventures, lowered efficiency]}. The risks are better identified based on the causes than the actual effects it has on the supply chain (e.g. risk caused by severe weather is an environmental risk or risk caused by strikes is an operational risk). Please try and detect these kind of supply chain risks and information related to them from this news article " + article.text + ". If you detect a supply chain risk, please list information about the risk in the following string format: <risk_category>,<description_of_risk>,<cause_of_risk>,<supply_chain_entities_present_in_risk_separated_by_;>,<geographical_locations_separated_by_;>,<business_sectors_affected_separated_by_;>,<"+url+">. Don't use any extra commas, other than the ones in the aforementioned format. Don't generate any extra ',' characters, as the result is used to create a csv-file. If name of geographical location includes a ',' character (e.g. county, state), replace it with a space. If you don't recognize some information, replace it with 'No information'. Only include risks if the entities related to the risks are clearly, uniquely and explicitly disclosed in the news article and extract those entities. Don't generate only general-level entities, but try to generate more in detail identification for all identities (e.g. company name). Only generate unique identification of entities that are explicitly mentioned in the news article. Include the events in a string form list in the following format: <risk_1>&<risk_2>&<risk_n>. Generate an answer that only contains the list."
                # A gemini 2.5 flash LLM is prompted with the prompt.
                geminiresponse = client.models.generate_content(
                    model='gemini-2.5-flash-preview-04-17',contents=message)
                print(geminiresponse.text)
                # The generated response might be of 'NoneType'
                if geminiresponse.text is not None:
                    # The generated response is split to a list of rows.
                    split = geminiresponse.text.split('&')
                    print(split)
                    # Going through all the risks extracted from an article.
                    for risk in split:
                        # Splitting the values for a row for the different fields.
                        row = risk.split(',')
                        # Writing the values to the CSV-file
                        csvwriter.writerow(row)
        # If the download of the article fails, the error is catched.
        except ArticleException as err:
            print('Article download failed.')