# 1. Scrape tweets from @dexpose_io

**Code reference:** https://github.com/bocchilorenzo/ntscraper

In [1]:
from ntscraper import Nitter

scraper = Nitter(0)

Testing instances: 100%|███████████████████████████████████████████████████████████████| 77/77 [01:30<00:00,  1.18s/it]


In [2]:
from dateutil import parser
from io import StringIO
import pandas as pd
import pytz
import time
import datetime

def parse_date_custom(date_str):
    # Assume new data might still use the local time format without explicit timezone
    format_new_data = "%b %d, %Y · %I:%M %p UTC"  # For new data scraped in a different format

    # Directly parse the ISO 8601 format including timezone
    try:
        date_parsed = pd.to_datetime(date_str, errors='coerce')
        if not pd.isna(date_parsed):
            # If the date was parsed successfully, ensure it's in UTC
            return date_parsed.tz_convert(pytz.utc) if date_parsed.tzinfo else date_parsed.tz_localize(pytz.utc)
    except ValueError:
        date_parsed = pd.NaT

    # Try parsing any new data format
    if pd.isna(date_parsed):
        try:
            date_parsed = pd.to_datetime(date_str, format=format_new_data, errors='coerce')
            if not pd.isna(date_parsed):
                # New data format assumed to be in UTC, so localize without conversion
                return date_parsed.tz_localize(pytz.utc)
        except ValueError:
            return pd.NaT

    return date_parsed


def decode(name):
    try:
        with open(f'{name}.csv', 'rb') as file:
            content = file.read().decode('utf-8', errors='replace')
        data_io = StringIO(content)
        df_existing = pd.read_csv(data_io)
        # Apply custom parsing immediately after loading
        df_existing['Date (UTC)'] = df_existing['Date (UTC)'].apply(parse_date_custom)
    except FileNotFoundError:
        df_existing = pd.DataFrame()
    return df_existing

def get_tweets(name, modes, no):
    retry_delay = 0  # Initialize delay
    max_retries = 100  # Set maximum number of retries

    df_existing = decode(name)  # Load and parse existing data
    
    for retry in range(max_retries):
        tweets = scraper.get_tweets(name, mode=modes, number=no)
        if not tweets['tweets']:
            print(f'Retrying in {retry_delay} second{"s" if retry_delay > 1 else ""}...')
            time.sleep(retry_delay)
            retry_delay += 0  # Increment delay for each retry
            continue
        else:
            profile = scraper.get_profile_info(name)
            current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            print(f'Successfully scraped {no} tweets as at {current_time} from @{name}!')
            if not profile is None: 
                print(f'Profile Info of @{username}\n{"="*40}')
                print(f"Name: {profile['name']}")
                print(f"Bio: {profile['bio']}")
                print(f"Location: {profile['location']}")
                print(f"Website: {profile['website']}")
                print(f"Joined: {profile['joined']}")
            #print(tweets)
            final_tweets = [ 
                [x['date'], x['link'], x['pictures'][0] if x['pictures'] else None, x['text']] 
                for x in tweets['tweets'] 
            ]
            dat = pd.DataFrame(final_tweets, columns=['Date (UTC)', 'URL', 'Image', 'Text'])
            dat['Date (UTC)'] = dat['Date (UTC)'].apply(parse_date_custom)  # Parse new data
            
            # Add a "Scrape Date" column and set it to the current date for the newly scraped data
            dat['Scrape Date'] = pd.to_datetime('today').strftime('%Y-%m-%d %H:%M:%S+00:00')

            # Concatenate existing and newly scraped data
            df_all = pd.concat([df_existing, dat])
            
            # Drop duplicates based on "Date (UTC)" while keeping the first occurrence of non-empty "Scrape Date"
            df_all = df_all.sort_values(by='Scrape Date', ascending=False).drop_duplicates(subset=['Date (UTC)'], keep='last')

            # Sort by "Date (UTC)"
            df_all.sort_values(by='Date (UTC)', ascending=False, inplace=True)
            
            df_all.to_csv(f'{name}.csv', index=False, encoding='utf-8')
            return df_all

    print("Failed to scrape tweets after maximum retries.")
    return pd.DataFrame()

In [3]:
username = "dexpose_io"

profile = scraper.get_profile_info(username)
tweets = get_tweets(username, 'user', 50)

01-Jul-24 10:20:11 - No instance specified, using random instance https://nitter.privacydev.net
01-Jul-24 10:20:17 - No instance specified, using random instance https://nitter.privacydev.net
01-Jul-24 10:20:23 - Current stats for dexpose_io: 20 tweets, 0 threads...
01-Jul-24 10:20:27 - Current stats for dexpose_io: 40 tweets, 0 threads...
01-Jul-24 10:20:31 - Current stats for dexpose_io: 50 tweets, 0 threads...
01-Jul-24 10:20:31 - No instance specified, using random instance https://nitter.privacydev.net
Successfully scraped 50 tweets as at 2024-07-01 10:20:36 from @dexpose_io!
Profile Info of @dexpose_io
Name: DeXpose
Bio: Finding the needles in the cyber haystack.
Location: 
Website: http://dexpose.io
Joined: 11:59 AM - 3 Oct 2022


# 2. Extract useful information from tweets

Each tweet from [@dexpose_io](https://twitter.com/dexpose_io) is a cyber threat alert.

We make use of LLM and AI vision to extract details from each tweet.

We will employ the GPT-3.5 Turbo model through Azure OpenAI API to analyze the tweet text, and from which to identify the victimized organization and country.

We will employ **GPT-4 vision model** through Azure OpenAI API to analyze the tweet imgage, and from which to identify the victimized organization and country from.

The reason of employing both techniques together is to provide a second output just in case the first one is incorrect or missing.

In [6]:
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
import os
import json
import pandas as pd
from PIL import Image
from io import BytesIO
import requests
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from langchain.schema import HumanMessage, SystemMessage

# Load API keys and configurations from .env file as environment variables
load_dotenv() 

# Initialize Azure LLM (GPT-3.5 Turbo) for simple text processing
azure_gpt35turbo = AzureChatOpenAI(
    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
    api_key=os.environ.get("AZURE_OPENAI_KEY"),
    openai_api_version=os.environ.get("AZURE_OPENAI_VERSION"),
    azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT_GPT35TURBO"),
    temperature=0
)

# Initialize Azure LLM (GPT-4) for more more complex agentic tasks
azure_gpt4 = AzureChatOpenAI(
    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
    api_key=os.environ.get("AZURE_OPENAI_KEY"),
    openai_api_version=os.environ.get("AZURE_OPENAI_VERSION"),
    azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT_GPT4"),
    temperature=0
)

# Initialize Azure LLM (GPT-4o) for more more complex agentic tasks
azure_gpt4o = AzureChatOpenAI(
    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT_GPT4O"),
    api_key=os.environ.get("AZURE_OPENAI_KEY_GPT4O"),
    openai_api_version=os.environ.get("AZURE_OPENAI_VERSION"),
    azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT_GPT4O"),
    temperature=0
)

# Initialize Azure GPT4 Vision model for image processing
azure_gpt4vision = AzureChatOpenAI(
    azure_endpoint=os.environ.get("AZURE_OPENAI_VISION_ENDPOINT"),
    api_key=os.environ.get("AZURE_OPENAI_VISION_KEY"),
    openai_api_version=os.environ.get("AZURE_OPENAI_VERSION"),
    azure_deployment=os.environ.get("AZURE_OPENAI_VISION_DEPLOYMENT"),
    temperature=0
)

# Initialize a BlobServiceClient for storage of images
connect_str = os.environ.get("AZURE_STORAGE") 
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_name = 'capstone-images'

# Define the columns
columns = [
    'Alert Type',
    'Ransomware',
    'Victimized Entity', 
    'Hashtagged country', 
    'Entity in image', 
    'Country',
    'Industry'
]

unknown = 'UNK'  # A placeholder string for whenever an error occurred

# Function to upload image to Azure
def upload_to_azure(image_data, image_id):
    img_byte_arr = BytesIO()
    image_data.save(img_byte_arr, format='PNG')
    img_byte_arr = img_byte_arr.getvalue()
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=f"{image_id}.PNG")
    blob_client.upload_blob(img_byte_arr, overwrite=True)
    return blob_client.url
    
# Function to crop and upload image (to reduce chances of triggering Azure OpenAI's content management policy, eg. image containing a human face like graphics
def crop_and_upload_image(url):
    try:
        response = requests.get(url)
        image = Image.open(BytesIO(response.content))
        width, height = image.size
        top = height * 0.60  # Starting from 60% height from the top
        bottom = height - (height * 0.10)  # Ending at 10% height from the bottom
        right = width - (width * 0.40)  # Ending at 40% width from the right
        cropped_image = image.crop((0, top, right, bottom))
        image_id = url.split('/')[-1].rsplit('.', 1)[0]
        image_url = upload_to_azure(cropped_image, image_id)
        return image_url
    except Exception as e:
        print(f"Failed to process or upload image: {e}")
        return None

# Function to read and process tweet
def read_tweet(tweet):
    
    print(f"Analyzing tweet: {tweet[:100]}")
    
    query = f"""
    Read the below cyber security update tweet, and provide an output as instructed. 
    ===TWEET START===
    {tweet}
    ===TWEET END===
    Complete the following tasks:
    If the topic says 'RANSOMEWARE ALERT' or it names a ransomware, give it a classification label: 'Ransomware'; 
    else if the topic says 'DATA BREACH ALERT', give it a classification label: 'Data Breach'.
    Identify the name of the Ransomeware, if applicable.
    Identify the name of the affected organization, if available.
    Identify the country being hashtagged, if available.  
    If for any of the above the information is unknown or not available, state '{unknown}'.
    Provide in JSON format containing the following keys in exact order: {columns[0]}, {columns[1]}, {columns[2]}, {columns[3]},
    with corresponding values being the classification, name of ransomware, affected organization, and hashtagged country.
    """
    response = azure_gpt35turbo.invoke(
        [
            SystemMessage(
                content="You are a cyber security analyst who monitors and analyzes cyber threat trends and data."
            ),
            HumanMessage(content=query),
        ])
    json_content = response.content
    print(json_content)  # For debugging purposes
    return json_content

# Function to analyze image
def analyze_image(url):
    print(f'Analyzing image: {url}')  # For debugging purposes
    if url is None:
        return unknown
    try:
        response = azure_gpt4vision.invoke(
            [
                SystemMessage(
                    content="The provided image likely contains an organization name, a country name and its flag."
                ),
                HumanMessage(
                    content=[
                        {
                            "type": "text", 
                            "text": f"""
                                Identify the organization and country; if it is not a country but a state of the USA, state 'USA'; if it is Taiwan, state 'Taiwan, China'.
                                If for any of the above the information is unknown or not available, state '{unknown}'.
                                """
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": url,
                                "detail": "auto",
                            },
                        },
                    ]
                )
            ]
        )
        #print(response.content) # For debugging purposes
        response = azure_gpt35turbo.invoke(
            [
                SystemMessage(
                    content="You are a helpful assistant."
                ),
                HumanMessage(content=f"""
                    Reformat the following into JSON format:
                    {response.content}
                """),
            ])
        json_content = response.content
        print(json_content)  # For debugging purposes
        return json_content
    except Exception as e:
        print(f"Error processing image {url}: {e}")
        return unknown  # Return a placeholder string indicating an error occurred

We make use of agentic AI to automate the classification of the victimized organization into an industry.

Two agents are created.  The first agent **(GPT-3.5-turbo)** will search the web for information on the victim.  

The second agent **(GPT-4o)** will receive information gathered by the first agent, and then classify the victim into an industry.

The industry list is from MSCI (there are 25 industry groups) plus an additional 'Government and public services'.

In [7]:
from crewai import Agent, Task, Crew, Process
from crewai_tools import SerperDevTool
from crewai.tasks.task_output import TaskOutput

search_tool = SerperDevTool()

msci_groups = '''
    'Energy', 
    'Materials', 
    'Capital Goods', 
    'Commercial & Professional Services', 
    'Transportation', 
    'Automobiles & Components', 
    'Consumer Durables & Apparel', 
    'Consumer Services', 
    'Consumer Discretionary Distribution & Retail', 
    'Consumer Staples Distribution & Retail', 
    'Food, Beverage & Tobacco', 
    'Household & Personal Products', 
    'Health Care Equipment & Services', 
    'Pharmaceuticals, Biotechnology & Life Sciences', 
    'Banks', 
    'Financial Services', 
    'Insurance', 
    'Software & Services', 
    'Technology Hardware & Equipment', 
    'Semiconductors & Semiconductor Equipment', 
    'Telecommunication Services', 
    'Media & Entertainment', 
    'Utilities', 
    'Equity Real Estate Investment Trusts (REITs)', 
    'Real Estate Management & Development'
'''

class Tasks:
    def company_search(self, agent, org, country):
        return Task(
            description=f"""
                Given an organization/company named {org} based in {country}, 
                search its exact name on the internet for information to classify its industry.
             """,
            agent=agent,
            #context=context,
            tools=[search_tool], 
            async_execution=False,
            expected_output="""Only a JSON in the following format:
                {
                    "organization": organization,
                    "base country": country,
                    "industry": industry
                }
            """,
        )
    def classification(self, agent, context):
        return Task(
            description=f"""
                Given an orgnaization, its based country and business type, 
                classify it into the one and only one most likely Industry Group strictly from [{msci_groups}, 'Government and public services'].  
                If it is impossible to classify, state the issue and reason.
                """,
            agent=agent,
            context = [context],
            #tools=[search_tool], 
            async_execution=False,
            expected_output="""Only a JSON in the following format:
                {
                    "Industry Group": classification
                }
            """,
        )

def job(company, country):
    print(f'Forming crew to classify industry...')  # For debugging purposes
    # Create agents
    analyst = Agent(
        role="Company Analyst",
        goal="Classify the industry of a given company",
        backstory="""As a junior analyst, your boss gives you an organization/company name and ask you to find out the industry/sector it belongs.  
        You make use of internet search for the task, looking for the most reliable and trusted sources about the company.""",
        tools = [search_tool],
        verbose=False,
        allow_delegation=False,
        max_iter=5,
        llm = azure_gpt35turbo
    )
    
    supervisor = Agent(
        role="Supervisor",
        goal="Classify a non-standard industry name based on a standard Industry Group.",
        backstory=f"""You are a business expert with sound knowledge of all kinds of industries and companies.
        Based on your knowledge, you always classsify a given business type into one and only one strictly among [{msci_groups}, 'Government and public services'].""",
        #tools = [search_tool],
        verbose=False,
        allow_delegation=False,
        max_iter=5,
        llm = azure_gpt4o
    )
    
    # Instantiate the tasks
    tasks = Tasks()
    search = tasks.company_search(analyst, company, country)
    classification = tasks.classification(supervisor, search)
    
    # Form the crew
    crew = Crew(
        agents=[analyst, supervisor],
        tasks=[search,classification],
        process=Process.sequential,
        #manager_llm=ollama,
        verbose=2,
    )
    
    # Kick off the crew's work
    results = crew.kickoff()
    
    # Print the results
    print("Crew Work Results:")
    print(results)
    print()
    return(results)

In [8]:
# Function to process each row
def process_row(row):
    if row[columns[:6]].isnull().all():
        
        # Process text
        result_json = read_tweet(row['Text'])
        if result_json:
            try:
                result = json.loads(result_json)
                #print(result)  # For debugging purposes
                values = list(result.values())
                for i, col in enumerate(columns[:4]):
                    row[col] = values[i] if i < len(values) else unknown
            except json.JSONDecodeError:
                print("Failed to decode JSON response")
                for i, col in enumerate(columns[:4]):
                    row[col] = unknown
        else:
            print("Response is None or empty")
            for i, col in enumerate(columns[:4]):
                row[col] = unknown

        time.sleep(5)
        
        # Process image
        url = crop_and_upload_image(row['Image'])
        if url:
            try:
                result_json = analyze_image(url)
                if result_json:
                    try:
                        result = json.loads(result_json)
                        #print(result)  # For debugging purposes
                        values = list(result.values())
                        for i, col in enumerate(columns[4:6]):
                            row[col] = values[i] if i < len(values) else unknown
                    except json.JSONDecodeError:
                        print("Failed to decode JSON response")
                        for i, col in enumerate(columns[4:6]):
                            row[col] = unknown
                else:
                    print("Response is None or empty")
                    for i, col in enumerate(columns[4:6]):
                        row[col] = unknown
            except Exception as e:
                print(f"Error processing image {url}: {e}")
                for i, col in enumerate(columns[4:6]):
                    row[col] = unknown

        # Collect organization and country names
        org_names = []
        if row[columns[2]] == row[columns[4]]:
            if row[columns[2]] != unknown:
                org_names.append(row[columns[2]])
        else:
            if row[columns[2]] != unknown:
                org_names.append(row[columns[2]])
            if row[columns[4]] != unknown:
                org_names.append(row[columns[4]])

        country_names = []
        if row[columns[3]] == row[columns[5]]:
            if row[columns[3]] != unknown:
                country_names.append(row[columns[5]])
        else:
            if row[columns[3]] != unknown:
                country_names.append(row[columns[3]])
            if row[columns[5]] != unknown:
                country_names.append(row[columns[5]])

        # Format organization names output
        org_output = f'"{org_names[0]}" or "{org_names[1]}"' if len(org_names) == 2 else org_names[0] if len(org_names) == 1 else 'unknown organization'
        
        # Format country names output
        country_output = f'"{country_names[0]}" or "{country_names[1]}"' if len(country_names) == 2 else country_names[0] if len(country_names) == 1 else 'unknown location'
        
        #print("Organization Names:", org_output)  # For debugging purposes
        #print("Country Names:", country_output)    # For debugging purposes

        time.sleep(5)
        
        try:
            result_json = job(org_output, country_output)
            if result_json:
                try:
                    result = json.loads(result_json)
                    #print(result)  # For debugging purposes
                    values = list(result.values())
                    for i, col in enumerate(columns[6:]):
                        row[col] = values[i] if i < len(values) else unknown
                except json.JSONDecodeError:
                    print("Failed to decode JSON response")
                    for i, col in enumerate(columns[6:]):
                        row[col] = unknown
            else:
                print("Response is None or empty")
                for i, col in enumerate(columns[6:]):
                    row[col] = unknown
        except Exception as e:
            print(f"Error calling job function: {e}")
            for i, col in enumerate(columns[6:]):
                row[col] = unknown


    return row

In [9]:
import logging

# Process only the first two rows for testing
tweets_update = tweets.apply(process_row, axis=1)

# Display the first few rows of the DataFrame to ensure it's loaded correctly
tweets_update.head()

Analyzing tweet: DATA BREACH ALERT -  Source code of Cyberpunk 2077 allegedly kept for sale  The leaked archive repor
01-Jul-24 10:21:53 - HTTP Request: POST https://karlopenai.openai.azure.com//openai/deployments/gpt35turbo/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
{
  "Alert Type": "Data Breach",
  "Ransomware": "UNK",
  "Victimized Entity": "Cyberpunk 2077",
  "Hashtagged country": "UNK"
}
01-Jul-24 10:21:59 - Request URL: 'https://karlhku4785985767.blob.core.windows.net/capstone-images/GRKgsu1XEAE52zo.PNG'
Request method: 'PUT'
Request headers:
    'Content-Length': '59158'
    'x-ms-blob-type': 'REDACTED'
    'x-ms-version': 'REDACTED'
    'Content-Type': 'application/octet-stream'
    'Accept': 'application/xml'
    'User-Agent': 'azsdk-python-storage-blob/12.19.0 Python/3.10.14 (Windows-10-10.0.22631-SP0)'
    'x-ms-date': 'REDACTED'
    'x-ms-client-request-id': 'b21582e8-3750-11ef-bbb9-a83b76af1466'
    'Authorization': 'REDACTED'
A body is sent with th

Unnamed: 0,Date (UTC),URL,Image,Text,Scrape Date,Alert Type,Ransomware,Victimized Entity,Hashtagged country,Entity in image,Country,Industry
0,2024-06-28 13:37:00+00:00,https://twitter.com/dexpose_io/status/18066834...,https://pbs.twimg.com/media/GRKgsu1XEAE52zo.jpg,DATA BREACH ALERT - Source code of Cyberpunk ...,2024-07-01 10:20:36+00:00,Data Breach,UNK,Cyberpunk 2077,UNK,Cyberpunk 2077,Poland,UNK
1,2024-06-24 11:12:00+00:00,https://twitter.com/dexpose_io/status/18051973...,https://pbs.twimg.com/media/GQ1ZOxFXMAAm1yT.jpg,RANSOMWARE ALERT ☣ - Federal Reserve Board fal...,2024-07-01 10:20:36+00:00,Ransomware,LockBit,Federal Reserve Board,UNK,Federal Reserve Board,USA,UNK
0,2024-06-14 20:19:00+00:00,https://twitter.com/dexpose_io/status/18017111...,https://pbs.twimg.com/media/GQD2qkbXEAERKnL.jpg,DATA BREACH ALERT - Database of HostLegends al...,2024-06-19 21:26:50+00:00,Data Breach,UNK,HostLegends,UNK,HOSTLEGENDS,USA,UNK
1,2024-06-13 16:40:00+00:00,https://twitter.com/dexpose_io/status/18012935...,https://pbs.twimg.com/media/GP963DtXUAA45UQ.jpg,RANSOMWARE ALERT - Racal Acoustics Ltd falls v...,2024-06-19 21:26:50+00:00,Ransomware,RansomHub Ransomware,Racal Acoustics Ltd,UNK,Racal Acoustics Ltd,UK,UNK
2,2024-06-13 07:41:00+00:00,https://twitter.com/dexpose_io/status/18011579...,https://pbs.twimg.com/media/GP7_fFzXcAAH7WJ.jpg,RANSOMWARE ALERT - Smartweb falls victim to PL...,2024-06-19 21:26:50+00:00,Ransomware,PLAY Ransomware,Smartweb,UNK,SMARTWEB,USA,UNK


In [10]:
# Merging updates to existing file
tweets.update(tweets_update)

# Save the updated file
tweets.to_csv(f'{username}.csv', index=False, encoding='utf-8')

With the tweets information already saved in a csv file, we can now employ an agent powered by **GPT-4** to analyze the csv data.

This allows us to automate the querying of the csv data.

In [11]:
from langchain_experimental.agents.agent_toolkits import create_csv_agent, create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType

csv_agent = create_csv_agent(
    azure_gpt4o,
    f"{username}.csv",
    verbose=False,
    agent_type=AgentType.OPENAI_FUNCTIONS,
)

def tweets_stats(queries):
    prompt = f'''You are given a dataset of historic cyber security alerts.
        'Date (UTC)' column is the alert publish date.
        'URL' column is the url to the alert.
        'Image' column is the url to the alert image.	
        'Text' column is the text content of the alert.	
        '{columns[0]}' column shows the type of alert given by the alert.
        '{columns[1]}' column shows the name of ransomware mentioned by the alert.	
        '{columns[2]}' gives the name of the victimized organization mentioned in the alert.
        '{columns[5]}' gives the country in which the victimized organization is based.
        '{columns[6]}' gives the industry of the victimized organization.
        Answer the question by appropriately analyzing the data, but ignoring columns '{columns[3]}' and '{columns[4]}' and 'UNK' values. '''
    responses = []
    for query in queries:
        response = csv_agent.invoke(
            [
                SystemMessage(content=prompt),
                HumanMessage(content=query),
            ])
        responses.append(response['output'])
    return responses

In [12]:
q1 = "How many alerts were published in Q1 2024?"
q2 = "Looking at only Ransomware Alert, what are the top 3 ransomwares in Q1 2024?"
q3 = "What are the top 3 countries where victims of Ransomware are based in Q1 2024?"
q4 = "What are the top 3 countries where victims of Data Breach are based in Q1 2024?"

queries = [q1, q2, q3, q4]

responses = tweets_stats(queries)

for response in responses:
    print(response)

01-Jul-24 10:49:32 - HTTP Request: POST https://karlchatgpt-east-us.openai.azure.com//openai/deployments/gpt4o/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 429 Too Many Requests"
01-Jul-24 10:49:32 - Retrying request to /chat/completions in 0.983072 seconds
01-Jul-24 10:49:33 - HTTP Request: POST https://karlchatgpt-east-us.openai.azure.com//openai/deployments/gpt4o/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 429 Too Many Requests"
01-Jul-24 10:49:33 - Retrying request to /chat/completions in 9.000000 seconds
01-Jul-24 10:49:43 - HTTP Request: POST https://karlchatgpt-east-us.openai.azure.com//openai/deployments/gpt4o/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 429 Too Many Requests"


RateLimitError: Error code: 429 - {'error': {'code': '429', 'message': 'Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2024-03-01-preview have exceeded token rate limit of your current OpenAI S0 pricing tier. Please retry after 86400 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.'}}

In [56]:
q5 = "How many times did China become the subject of Ransomware attack in 2024?"
q6 = "How many times did China become the subject of Data Breach incident in 2024?"
q7 = "In 2024, which are the top 3 industries that have become the victims of Ransomware attack?"
q8 = "In 2024, which are the top 3 industries that have become the victims of Data Breach incident?"

queries = [q5, q6, q7, q8]

responses = tweets_stats(queries)

for response in responses:
    print(response)

27-May-24 23:23:36 - HTTP Request: POST https://karlopenai.openai.azure.com//openai/deployments/gpt4/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
27-May-24 23:23:40 - HTTP Request: POST https://karlopenai.openai.azure.com//openai/deployments/gpt4/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 429 Too Many Requests"
27-May-24 23:23:40 - Retrying request to /chat/completions in 5.000000 seconds
27-May-24 23:23:45 - HTTP Request: POST https://karlopenai.openai.azure.com//openai/deployments/gpt4/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
27-May-24 23:23:49 - HTTP Request: POST https://karlopenai.openai.azure.com//openai/deployments/gpt4/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 429 Too Many Requests"
27-May-24 23:23:49 - Retrying request to /chat/completions in 7.000000 seconds
27-May-24 23:23:57 - HTTP Request: POST https://karlopenai.openai.azure.com//openai/deployments/gpt4/chat/completions?api-version=2024-03-01-pre