In [1]:
import pandas as pd

data = {
    'customer_id': [1, 1, 1, 2, 2, 3, 4],
    'transaction_date': pd.to_datetime([
      '2023-01-01', '2023-01-10', '2023-03-01',  # customer 1
      '2023-02-01', '2023-04-05',                # customer 2
      '2023-01-01',                              # customer 3
      '2023-05-20'                               # customer 4
    ]),
    'amount_spent': [100, 150, 80, 200, 50, 40, 90]
}


transactions = pd.DataFrame(data)

In [9]:
transactions = transactions.sort_values(by=['customer_id', 'transaction_date'])

In [10]:
transactions

Unnamed: 0,customer_id,transaction_date,amount_spent
0,1,2023-01-01,100
1,1,2023-01-10,150
2,1,2023-03-01,80
3,2,2023-02-01,200
4,2,2023-04-05,50
5,3,2023-01-01,40
6,4,2023-05-20,90


In [11]:
cust_first_trans = dict()
for index, row in transactions.iterrows():
    cid = row['customer_id']
    tdate = row['transaction_date']
    if cid not in cust_first_trans:
        cust_first_trans[cid] = tdate

In [12]:
cust_first_trans

{1: Timestamp('2023-01-01 00:00:00'),
 2: Timestamp('2023-02-01 00:00:00'),
 3: Timestamp('2023-01-01 00:00:00'),
 4: Timestamp('2023-05-20 00:00:00')}

In [14]:
def get_first_trans(cid):
    return cust_first_trans[cid]

In [15]:
transactions['first_trans'] = transactions['customer_id'].apply(get_first_trans)

In [16]:
transactions

Unnamed: 0,customer_id,transaction_date,amount_spent,first_trans
0,1,2023-01-01,100,2023-01-01
1,1,2023-01-10,150,2023-01-01
2,1,2023-03-01,80,2023-01-01
3,2,2023-02-01,200,2023-02-01
4,2,2023-04-05,50,2023-02-01
5,3,2023-01-01,40,2023-01-01
6,4,2023-05-20,90,2023-05-20


In [23]:
transactions['diff_to_first'] = (transactions['transaction_date'] - transactions['first_trans']).dt.days

In [25]:
transactions['within_30'] = transactions['diff_to_first'] <= 30

In [26]:
transactions

Unnamed: 0,customer_id,transaction_date,amount_spent,first_trans,diff_to_first,within_30
0,1,2023-01-01,100,2023-01-01,0,True
1,1,2023-01-10,150,2023-01-01,9,True
2,1,2023-03-01,80,2023-01-01,59,False
3,2,2023-02-01,200,2023-02-01,0,True
4,2,2023-04-05,50,2023-02-01,63,False
5,3,2023-01-01,40,2023-01-01,0,True
6,4,2023-05-20,90,2023-05-20,0,True


In [27]:
cust_30_days_count = dict()
for index, row in transactions.iterrows():
    cid = row['customer_id']
    if not row['within_30']:
        continue
    if cid not in cust_30_days_count:
        cust_30_days_count[cid] = 1
    else:
        cust_30_days_count[cid] += 1

In [29]:
cust_30_days_count = {k:v>=2 for k,v in cust_30_days_count.items()}

In [30]:
cust_30_days_count

{1: True, 2: False, 3: False, 4: False}

In [31]:
all_data = list()
for cid, tdate in cust_first_trans.items():
    cust_30_days_count[cid]
    tcount = cust_30_days_count[cid]
    data = {
        'customer_id': cid
        ,'first_trans': tdate
        ,'repeat_within_30': tcount
    }
    all_data.append(data)

In [32]:
pd.DataFrame(all_data)

Unnamed: 0,customer_id,first_trans,repeat_within_30
0,1,2023-01-01,True
1,2,2023-02-01,False
2,3,2023-01-01,False
3,4,2023-05-20,False
