# Performance Bottlenecks

Let's look at examples of how AI can help us identify performance bottlenecks.

## Naive Distinct Value Tracking

In [None]:
import time  # Import time module for measuring execution time
import random  # Import random module for generating random numbers

def process_ids(ids):
    # Process a list of IDs to identify unique IDs and count duplicates.
    unique_ids = []  # Initialize an empty list to store unique IDs
    duplicates = 0  # Initialize a counter for duplicates
    for id_val in ids:  # Iterate over each ID in the input list
        if id_val in unique_ids:  # Check if the ID is already in the unique list
            duplicates += 1  # If yes, increment the duplicate count
        else:  # If not, it's a new unique ID
            unique_ids.append(id_val)  # Add it to the unique list
    return unique_ids, duplicates  # Return the unique IDs list and duplicate count

# Simulate 100,000 IDs with some duplicates
random.seed(42)  # Set a fixed seed for reproducibility of random numbers
ids = [random.randint(1, 100000) for _ in range(100000)]  # Generate a list of 100,000 random integers between 1 and 100,000

start_time = time.time()  # Record the start time before processing
unique, dups = process_ids(ids)  # Call the function to process the IDs and get unique IDs and duplicate count
end_time = time.time()  # Record the end time after processing

# Print the results, including the number of unique IDs, duplicates, and execution time formatted to 2 decimal places
print(f"Found {len(unique)} unique IDs and {dups} duplicates in {end_time - start_time:.2f} seconds")

In [None]:
import time
import random

def process_ids(ids):
    unique_ids_set = set()  # Use a set for fast membership checking
    duplicates = 0
    for id_val in ids:
        if id_val in unique_ids_set:
            duplicates += 1
        else:
            unique_ids_set.add(id_val)
    unique_ids = list(unique_ids_set)  # Convert back to list if needed
    return unique_ids, duplicates

# Simulate 100,000 IDs with some duplicates
random.seed(42)
ids = [random.randint(1, 100000) for _ in range(100000)]

start_time = time.time()
unique, dups = process_ids(ids)
end_time = time.time()

print(f"Found {len(unique)} unique IDs and {dups} duplicates in {end_time - start_time:.4f} seconds")


In [None]:
import numpy as np
import time
import random

random.seed(42)
ids = [random.randint(1, 100000) for _ in range(100_000)]

start_time = time.time()
ids_array = np.array(ids)
unique_ids = np.unique(ids_array)       # Returns sorted unique IDs
duplicates = len(ids) - len(unique_ids)
end_time = time.time()

print(f"Found {len(unique_ids)} unique IDs and {duplicates} duplicates in {end_time - start_time:.4f} seconds")


In [None]:
import pandas as pd
import time
import random

random.seed(42)
ids = [random.randint(1, 100000) for _ in range(100_000)]

start_time = time.time()
ids_series = pd.Series(ids)
unique_ids = ids_series.unique()
duplicates = len(ids) - len(unique_ids)
end_time = time.time()

print(f"Found {len(unique_ids)} unique IDs and {duplicates} duplicates in {end_time - start_time:.4f} seconds")


<div style="text-align:center; font-size:48px; line-height:1.2;">
|<br>
|<br>
|<br>
↓
</div>


Note when I got an AI-generated recommendation, it made some unnecessary code.

In [None]:
import time
import random

def process_ids(ids):
    seen_ids = set()
    unique_ids = []
    duplicates = 0
    for id_val in ids:
        if id_val in seen_ids:  # Efficient O(1) lookup on set
            duplicates += 1
        else:
            seen_ids.add(id_val)
            unique_ids.append(id_val)  # Maintain order if needed
    return unique_ids, duplicates

# Simulate 100,000 IDs with some duplicates
random.seed(42)
ids = [random.randint(1, 100000) for _ in range(100000)]

start_time = time.time()
unique, dups = process_ids(ids)
end_time = time.time()

print(f"Found {len(unique)} unique IDs and {dups} duplicates in {end_time - start_time:.2f} seconds")



The `seen_ids` list was completely redundant to the `seen_ids`. It was better I make a modificiation to remove it.

In [None]:
import time
import random

def process_ids(ids):
    unique_ids = set()
    duplicates = 0
    for id_val in ids:
        if id_val in unique_ids:  # Efficient O(1) lookup on set
            duplicates += 1
        else:
            unique_ids.add(id_val)
    return unique_ids, duplicates

# Simulate 100,000 IDs with some duplicates
random.seed(42)
ids = [random.randint(1, 100000) for _ in range(100000)]

start_time = time.time()
unique, dups = process_ids(ids)
end_time = time.time()

print(f"Found {len(unique)} unique IDs and {dups} duplicates in {end_time - start_time:.2f} seconds")


## Nested Loop Bottlenecks

This one is gnarly, and it's complex and slow. It captures active users with valid emails from a list.

In [None]:
import time

def process_users(user_data):
    # Process a list of user data to filter and transform active users with valid emails,
    # ensuring no duplicate emails are included in the result.

    filtered_users = []  # Initialize an empty list to store processed unique users

    for user in user_data:  # Iterate over each user dictionary in the input list
        # Extract relevant fields for clarity and checks
        email = user['email']  # Get the user's email
        is_active = user['active']  # Get the user's active status (boolean)

        # Validate if the user is active and the email contains '@' as a simple check for validity
        if is_active and '@' in email:
            # Check for duplicates by iterating through already filtered users
            for existing_user in filtered_users:
                if existing_user['email'] == email:  # Compare emails to detect duplicates
                    break  # If a duplicate is found, skip adding this user
            else:  # This else clause executes only if no break occurred (no duplicate found)
                # Create a new processed user dictionary
                processed_user = {
                    'email': email.lower() + '_processed',  # Lowercase the email and append '_processed'
                    'name': user['name'].upper(),  # Uppercase the user's name
                    'id': str(user['id'])  # Convert the ID to a string
                }
                filtered_users.append(processed_user)  # Add the processed user to the list

    return filtered_users  # Return the list of processed unique users


# Simulate a large dataset of 100,000 users for testing
# Each user has an ID from 0 to 99999, an email like 'userX@example.com',
# a name like 'User X', and active status alternating (even IDs are active)
users = [
    {'id': i, 'email': f'user{i}@example.com', 'name': f'User {i}', 'active': i % 2 == 0}
    for i in range(100000)
]

start = time.time()
# Process the simulated users using the function
result = process_users(users)
end = time.time()

# Print the number of processed users (should be half of 100,000 since only even IDs are active, and no duplicates)
print(f"Processed {len(result)} users in {end - start:.4f} seconds")

In [None]:
import time

def process_users(user_data):
    processed_users = []
    seen_emails = set()  # Track already processed emails

    for user in user_data:
        email = user['email']
        if user['active'] and '@' in email and email not in seen_emails:
            seen_emails.add(email)
            processed_users.append({
                'email': email.lower() + '_processed',
                'name': user['name'].upper(),
                'id': str(user['id'])
            })
    
    return processed_users

# Simulate 100,000 users
users = [
    {'id': i, 'email': f'user{i}@example.com', 'name': f'User {i}', 'active': i % 2 == 0}
    for i in range(100_000)
]

start = time.time()
result = process_users(users)
end = time.time()

print(f"Processed {len(result)} users in {end - start:.4f} seconds")


In [None]:
import pandas as pd
import time

# Simulate 100,000 users
users = pd.DataFrame({
    'id': range(100_000),
    'email': [f'user{i}@example.com' for i in range(100_000)],
    'name': [f'User {i}' for i in range(100_000)],
    'active': [i % 2 == 0 for i in range(100_000)]
})

start = time.time()

# Filter active users with valid emails
df = users[users['active'] & users['email'].str.contains('@')]

# Drop duplicate emails
df = df.drop_duplicates(subset='email')

# Apply transformations
df['email'] = df['email'].str.lower() + '_processed'
df['name'] = df['name'].str.upper()
df['id'] = df['id'].astype(str)

end = time.time()

print(f"Processed {len(df)} users in {end - start:.4f} seconds")


<div style="text-align:center; font-size:48px; line-height:1.2;">
|<br>
|<br>
|<br>
↓
</div>


We can simplify this a lot.

In [None]:
import re
import time

def process_users(user_data):
    filtered_users = []
    seen_emails = set()  # Track duplicates

    for user in user_data:
        email = user['email']
        if user['active'] and '@' in email and email not in seen_emails:
            processed_user = {
                'email': email.lower() + '_processed',
                'name': user['name'].upper(),
                'id': str(user['id'])
            }
            filtered_users.append(processed_user)
            seen_emails.add(email)  # Mark as seen

    return filtered_users


# Simulate a large dataset
users = [
    {'id': i, 'email': f'user{i}@example.com', 'name': f'User {i}', 'active': i % 2 == 0}
    for i in range(100000)
]


start = time.time()
# Process the simulated users using the function
result = process_users(users)
end = time.time()

# Print the number of processed users (should be half of 100,000 since only even IDs are active, and no duplicates)
print(f"Processed {len(result)} users in {end - start:.4f} seconds")


## SQL Optimization

Let's start with a common mistake that is easy to do in Python SQL. Let's say I want to insert the next 10_000 dates into a `CALENDAR` table that is a single column of dates. Why is this so slow?

In [None]:
import sqlite3
import time

start = time.time()

for _ in range(10_000):
    conn = sqlite3.connect("company_operations.db")
    cursor = conn.cursor()
    cursor.execute("""INSERT INTO CALENDAR (CALENDAR_DATE) VALUES
                   ((SELECT DATE(MAX(CALENDAR_DATE),'+1 day') FROM CALENDAR))
                   """)
    conn.commit()
    cursor.close()
    conn.close()

end = time.end()

print(f"Finished in {end - start:.4f} seconds")

In [None]:
import sqlite3
import time

start = time.time()

conn = sqlite3.connect("company_operations.db")
cursor = conn.cursor()

for _ in range(10_000):
    cursor.execute("""
        INSERT INTO CALENDAR (CALENDAR_DATE)
        VALUES ((SELECT DATE(MAX(CALENDAR_DATE), '+1 day') FROM CALENDAR))
    """)

conn.commit()  # Commit once at the end
cursor.close()
conn.close()

end = time.time()
print(f"Finished in {end - start:.4f} seconds")


In [None]:
import sqlite3
import time
from datetime import datetime, timedelta

start = time.time()

# Connect once
conn = sqlite3.connect("company_operations.db")
cursor = conn.cursor()

# Find the last date in the table
cursor.execute("SELECT MAX(CALENDAR_DATE) FROM CALENDAR")
last_date_str = cursor.fetchone()[0]  # e.g., '2025-08-25'
last_date = datetime.strptime(last_date_str, "%Y-%m-%d")

# Generate 10,000 new consecutive dates
new_dates = [(last_date + timedelta(days=i+1),) for i in range(10_000)]

# Bulk insert all at once
cursor.executemany("INSERT INTO CALENDAR (CALENDAR_DATE) VALUES (?)", new_dates)

conn.commit()
cursor.close()
conn.close()

end = time.time()
print(f"Inserted 10,000 dates in {end - start:.4f} seconds")


Let's pretend for a moment that this SQL query below took a very long time and the table is quite large in number of records. Let's see what it recommends.

In [None]:
import pandas as pd
import sqlite3
import time

conn = sqlite3.connect("company_operations.db")

sql = """
SELECT * FROM WEATHER_MONITOR
WHERE REPORT_DATE = '2021-05-05'
"""

start = time.time()
pd.read_sql(sql, conn)
end = time.time()
print(f"Query took {end - start:.4f} seconds")

conn.close()

But keep in mind, if the AI recommends an index and other solutions, there are gotchas like write speed going down substantially. These are nuances that you cannot pick up from vibe coding, and is why you should know the subject matter you are prompting.

Here is another example. Let's try to optimize this query.

In [None]:
import pandas as pd
import sqlite3
import time

conn = sqlite3.connect("company_operations.db")

sql = """
SELECT
CUSTOMER_ID,
ORDER_DATE,
QUANTITY,
(SELECT AVG(QUANTITY)
 FROM CUSTOMER_ORDER co3
 WHERE co3.CUSTOMER_ID = co1.CUSTOMER_ID) as avg_customer_quantity
FROM CUSTOMER_ORDER co1
ORDER BY ORDER_DATE
"""

start = time.time()
pd.read_sql(sql, conn)
end = time.time()
print(f"Query took {end - start:.4f} seconds")

In [None]:
import pandas as pd
import sqlite3
import time

conn = sqlite3.connect("company_operations.db")

sql = """
SELECT
    co1.CUSTOMER_ID,
    co1.ORDER_DATE,
    co1.QUANTITY,
    avg_per_customer.avg_quantity AS avg_customer_quantity
FROM CUSTOMER_ORDER co1
JOIN (
    SELECT CUSTOMER_ID, AVG(QUANTITY) AS avg_quantity
    FROM CUSTOMER_ORDER
    GROUP BY CUSTOMER_ID
) AS avg_per_customer
ON co1.CUSTOMER_ID = avg_per_customer.CUSTOMER_ID
ORDER BY co1.ORDER_DATE

"""

start = time.time()
pd.read_sql(sql, conn)
end = time.time()
print(f"Query took {end - start:.4f} seconds")

Ideally, we'd like to get a windowing function once the AI recommends a fix. But it may settle for a common table expression or derived table too. This again shows that the AI may propose something that works but may be suboptimal.

## Vectorization

Let's see how we can have AI convert our code to be vectorized for numerical operations. This will make our code much faster and help us learn NumPy equivalents to vanilla python loop operations.

Here is a simple summation of the numbers 1 through 300 million. It takes nearly 10 seconds to do. Let's see how AI suggests we fix it.

In [None]:
import time
start = time.time()

total = 0
for i in range(300_000_000):
    total += i

end = time.time()
print(f"Total: {total} in {end - start:.4f} seconds")

<div style="text-align:center; font-size:48px; line-height:1.2;">
|<br>
|<br>
|<br>
↓
</div>


Here is what ChatGPT suggested I do. It takes less than a second.

In [None]:
import numpy as np
import time

start = time.time()
total = np.sum(np.arange(300_000_000))
end = time.time()

print(f"Total: {total} in {end - start:.4f} seconds")

Calculating this average took nearly 7 seconds.

In [None]:
import time

start = time.time()

x = [i for i in range(300_000_000)]

mean = sum(x) / len(x)

end = time.time()

print(f"Mean: {mean} in {end - start:.4f} seconds")

<div style="text-align:center; font-size:48px; line-height:1.2;">
|<br>
|<br>
|<br>
↓
</div>
Below, the AI-generated response took less than half a second.

In [None]:
import numpy as np
import time

start = time.time()
x = np.arange(300_000_000)
mean = np.mean(x)
end = time.time()

print(f"Mean: {mean} in {end - start:.4f} seconds")

Now here is something painful. This operation takes nearly 30 seconds.

In [None]:
import time

start = time.time()

x = list(range(300_000_000))
y = list(range(300_000_000))
z = []
for i in range(len(x)):
    z.append(x[i] + y[i])

end = time.time()

print(f"Finished in {end - start:.4f} seconds")

<div style="text-align:center; font-size:48px; line-height:1.2;">
|<br>
|<br>
|<br>
↓
</div>

But the AI-generated optimization takes less than 10 seconds.

In [None]:
import numpy as np
import time

start = time.time()

x = np.arange(300_000_000, dtype=np.int64)
y = np.arange(300_000_000, dtype=np.int64)
z = x + y  # Vectorized addition

end = time.time()
print(f"Finished in {end - start:.4f} seconds")


In [None]:
import numpy as np
import time

start = time.time()
x = np.arange(300_000_000)
y = np.arange(300_000_000)
z = x + y
end = time.time()

print(f"Finished in {end - start:.4f} seconds")

Here we try to calculate mean, varaince, standard deviation from a list of numbers to normalize the data.

In [None]:
import math
import time

start = time.time()
data = [i * 0.1 for i in range(1_000_000)]  # Simulated dataset
mean = sum(data) / len(data)
variance = sum((x - mean) ** 2 for x in data) / len(data)
std_dev = math.sqrt(variance)
normalized = []
for val in data:
    normalized.append((val - mean) / std_dev)

end = time.time()

print(f"Finished in {end - start:.4f} seconds")

<div style="text-align:center; font-size:48px; line-height:1.2;">
|<br>
|<br>
|<br>
↓
</div>


In [None]:
import numpy as np
import time

start = time.time()
data = np.arange(1_000_000) * 0.1  # Simulated dataset
normalized = (data - np.mean(data)) / np.std(data)
end = time.time()

print(f"Finished in {end - start:.4f} seconds")

## Computing Pairwise Euclidean Distances for Clustering

In [None]:
import math
import time

start = time.time()
points = [[i, i+1] for i in range(10_000)]  # List of [x, y] points
distances = []
for i in range(len(points)):
    row = []
    for j in range(len(points)):
        dx = points[i][0] - points[j][0]
        dy = points[i][1] - points[j][1]
        row.append(math.sqrt(dx**2 + dy**2))
    distances.append(row)

end = time.time()

print("Finished in {:.4f} seconds".format(end - start))

In [None]:
from scipy.spatial.distance import cdist
import numpy as np
import time

points = np.array([[i, i+1] for i in range(10_000)], dtype=np.float64)

start = time.time()
distances = cdist(points, points, metric='euclidean')
end = time.time()

print("Finished in {:.4f} seconds".format(end - start))


<div style="text-align:center; font-size:48px; line-height:1.2;">
|<br>
|<br>
|<br>
↓
</div>

In [None]:
import numpy as np
import time

start = time.time()
points = np.arange(10_000)[:, np.newaxis] * np.array([1, 1]) + np.array([0, 1])  # Array of shape (10000, 2)
diffs = points[:, np.newaxis, :] - points[np.newaxis, :, :]
distances = np.sqrt(np.sum(diffs**2, axis=-1))
end = time.time()

print("Finished in {:.4f} seconds".format(end - start))
# Alternatively, for even faster: use scipy.spatial.distance.cdist(points, points)

## EXERCISE

Try to speed up this time series operation below.

In [None]:
import time

start = time.time()
prices = [i * 0.01 for i in range(10_000_000)]  # Time-series prices
volumes = [i % 1000 for i in range(10_000_000)]  # Volumes
high_volume_prices = []
for i in range(len(prices)):
    if volumes[i] > 500:
        high_volume_prices.append(prices[i])
avg_price = sum(high_volume_prices) / len(high_volume_prices) if high_volume_prices else 0
end = time.time()

print("Finished in {:.4f} seconds".format(end - start))
print(f"Average high-volume price: {avg_price:.2f}")

In [None]:
import numpy as np
import time

start = time.time()

# Create NumPy arrays
prices = np.arange(10_000_000, dtype=np.float64) * 0.01
volumes = np.arange(10_000_000) % 1000

# Filter prices where volume > 500
mask = volumes > 500
high_volume_prices = prices[mask]

# Compute average
avg_price = high_volume_prices.mean() if high_volume_prices.size > 0 else 0

end = time.time()
print(f"Finished in {end - start:.4f} seconds")
print(f"Average high-volume price: {avg_price:.2f}")


<div style="text-align:center; font-size:48px; line-height:1.2;">
|<br>
|<br>
|<br>
↓
</div>

In [None]:
import numpy as np
import time

start = time.time()

prices = np.arange(10_000_000) * 0.01
volumes = np.arange(10_000_000) % 1000
mask = volumes > 500
high_volume_prices = prices[mask]
avg_price = np.mean(high_volume_prices) if len(high_volume_prices) > 0 else 0

end = time.time()

print("Finished in {:.4f} seconds".format(end - start))