# What is the objective?
Given a large dataframe with the following simple structure:

id|text|create|
--|--|--|
100|hello|1-Jan
200|world|2-Jan

Study the following behaviour:
- How long does it take to load the frame from picked file and search for records with specified ids
- How long does it take to load the frame from picked file , add N new records, delete N oldest records and then save to disk

# imports

In [19]:
import random
import pandas as pd
import time
import datetime as dt

# Create dataframe

In [12]:


def pick_randomly_from_list(input: list[int] ,m_times: int)->list[int]:
    n_integers = input
    random_picks = [random.choice(n_integers) for _ in range(m_times)]
    return random_picks

In [10]:

# Given list of N integers
n_integers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Number of times to pick randomly
m_times = 50
results=pick_randomly_from_list(input=n_integers, m_times=m_times)
print("Random Picks:", results)


Random Picks: [5, 6, 8, 4, 10, 7, 9, 3, 5, 7, 1, 4, 4, 7, 3, 6, 2, 1, 2, 1, 7, 1, 2, 10, 10, 4, 10, 4, 6, 5, 6, 4, 6, 8, 9, 4, 7, 10, 9, 10, 5, 2, 10, 10, 10, 3, 8, 7, 5, 9]


In [20]:
def create_dataframe(max_rows: int, unique_row_count: int)->pd.DataFrame:
    starting_number=10000
    gap_between_record_minutes=3
    unique_list = list(range(starting_number, starting_number + unique_row_count))
    random_picks = pick_randomly_from_list(input=unique_list, m_times=max_rows)
    random_strings = [f"This is string number: {num}" for num in random_picks]
    create_dates: list[dt.datetime] = []
    now = dt.datetime.now()
    for idx in range(max_rows):
        few_minutes_ago = now - dt.timedelta(minutes=idx*gap_between_record_minutes)
        create_dates.append(few_minutes_ago)

    df = pd.DataFrame({
        'id': random_picks,
        'description': random_strings,
        'created_at': create_dates
    })
    return df

# Demo with 20 records

In [21]:
df_sample_20 = create_dataframe(max_rows=20, unique_row_count=10)
print(df_sample_20)

       id                   description                 created_at
0   10009  This is string number: 10009 2025-11-11 22:00:54.801009
1   10007  This is string number: 10007 2025-11-11 21:57:54.801009
2   10003  This is string number: 10003 2025-11-11 21:54:54.801009
3   10000  This is string number: 10000 2025-11-11 21:51:54.801009
4   10001  This is string number: 10001 2025-11-11 21:48:54.801009
5   10002  This is string number: 10002 2025-11-11 21:45:54.801009
6   10002  This is string number: 10002 2025-11-11 21:42:54.801009
7   10008  This is string number: 10008 2025-11-11 21:39:54.801009
8   10002  This is string number: 10002 2025-11-11 21:36:54.801009
9   10006  This is string number: 10006 2025-11-11 21:33:54.801009
10  10008  This is string number: 10008 2025-11-11 21:30:54.801009
11  10000  This is string number: 10000 2025-11-11 21:27:54.801009
12  10001  This is string number: 10001 2025-11-11 21:24:54.801009
13  10006  This is string number: 10006 2025-11-11 21:21:54.80