In [40]:
import pandas as pd

In [41]:
df = pd.read_csv('synthetic_data_v2.csv')

In [42]:
df.head()

Unnamed: 0,transaction_date,category,product,amount,is_festival,event_name,is_holiday
0,1/1/2022,topup,smartcell_topup,7490.03,True,New Year English,True
1,1/1/2022,insurance,insurance_payment,8769.24,True,New Year English,True
2,1/1/2022,bank_transaction,bank_deposit,4631.36,True,New Year English,True
3,1/1/2022,education,school_fee,6309.34,True,New Year English,True
4,1/1/2022,insurance,insurance_payment,4842.33,True,New Year English,True


In [43]:
df['transaction_date'] = pd.to_datetime(df['transaction_date'])
df.sort_values(by='transaction_date', inplace=True)

In [44]:
df.head()

Unnamed: 0,transaction_date,category,product,amount,is_festival,event_name,is_holiday
0,2022-01-01,topup,smartcell_topup,7490.03,True,New Year English,True
102,2022-01-01,bill_payment,electricity_bill,3473.99,True,New Year English,True
103,2022-01-01,bill_payment,internet_bill,1846.15,True,New Year English,True
104,2022-01-01,shopping,online_shopping,7516.84,True,New Year English,True
105,2022-01-01,government,govt_fee,5336.66,True,New Year English,True


In [45]:
import pandas as pd

# Assuming you already have your DataFrame `df`
# Here's how to generate the dictionary

category_to_products = (
    df.groupby('category')['product']
    .unique()
    .apply(list)
    .to_dict()
)


In [46]:
category_to_products

{'bank_transaction': ['bank_deposit', 'bank_withdrawal'],
 'bill_payment': ['electricity_bill', 'internet_bill', 'water_bill'],
 'education': ['school_fee'],
 'entertainment': ['movie_ticket', 'airline_ticket'],
 'government': ['govt_fee', 'tax_payment'],
 'insurance': ['insurance_payment'],
 'loan': ['loan_repayment', 'credit_card_payment'],
 'shopping': ['online_shopping', 'mobile_data_pack'],
 'topup': ['smartcell_topup', 'ntc_topup', 'ncell_topup']}

In [48]:
from datetime import datetime

today = datetime.today().date()

max_date = df.transaction_date.max().date()

# def translate_date(x):
#     delta = today - max_date
def translate_date(x):
    delta = today - max_date
    return x + delta

df['transaction_date'] = df['transaction_date'].apply(translate_date)

In [49]:
df.head()

Unnamed: 0,transaction_date,category,product,amount,is_festival,event_name,is_holiday
0,2022-06-24,topup,smartcell_topup,7490.03,True,New Year English,True
102,2022-06-24,bill_payment,electricity_bill,3473.99,True,New Year English,True
103,2022-06-24,bill_payment,internet_bill,1846.15,True,New Year English,True
104,2022-06-24,shopping,online_shopping,7516.84,True,New Year English,True
105,2022-06-24,government,govt_fee,5336.66,True,New Year English,True


In [50]:
df.shape

(92015, 7)

In [51]:
df.to_csv('synthetic_data_v3.csv', index=False)

In [67]:
_transaction_df = None
def load_transaction_data(refresh=False) -> pd.DataFrame:
    global _transaction_df

    if _transaction_df is None or refresh:
        df = pd.read_csv("synthetic_data_v3.csv")  # or query DB
        df['transaction_date'] = pd.to_datetime(df['transaction_date'], dayfirst=True)
        df['category'] = df['category'].astype(str)
        df['product'] = df['product'].astype(str)
        _transaction_df = df

    return _transaction_df.copy()


def get_transaction_summary(start_date: datetime, end_date: datetime, group_by='category'):
    df = load_transaction_data()
    df = df[df['transaction_date'].between(start_date, end_date)]
    summary = df.groupby(["transaction_date", "category"]).agg(
        transaction_count=("amount", "count"),
        transaction_amount=("amount", "sum")
    ).reset_index()
    return summary

In [None]:
get_transaction_summary

In [68]:
from datetime import timedelta

today = datetime.today()
start_date = today - timedelta(days=30)

get_transaction_summary(start_date, today)

  df['transaction_date'] = pd.to_datetime(df['transaction_date'], dayfirst=True)


Unnamed: 0,transaction_date,category,transaction_count,transaction_amount
0,2025-05-25,bank_transaction,9,17937.83
1,2025-05-25,bill_payment,10,27398.39
2,2025-05-25,education,6,10231.13
3,2025-05-25,entertainment,7,20678.19
4,2025-05-25,government,6,16483.49
...,...,...,...,...
264,2025-06-23,government,16,75076.44
265,2025-06-23,insurance,15,72166.39
266,2025-06-23,loan,45,195617.66
267,2025-06-23,shopping,36,180697.83


In [69]:
def past_30_days_data(group_by='category'):
    today = datetime.today()
    start_date = today - timedelta(days=30)
    end_date = today
    df = load_transaction_data()
    df = df[df['transaction_date'].between(start_date, end_date)]
    summary = df.groupby(["transaction_date", "category"]).agg(
        transaction_count=("amount", "count"),
        transaction_amount=("amount", "sum")
    ).reset_index()
    return summary

In [73]:
past_30_days_data().transaction_date.unique()

<DatetimeArray>
['2025-05-25 00:00:00', '2025-05-26 00:00:00', '2025-05-27 00:00:00',
 '2025-05-28 00:00:00', '2025-05-29 00:00:00', '2025-05-30 00:00:00',
 '2025-05-31 00:00:00', '2025-06-01 00:00:00', '2025-06-02 00:00:00',
 '2025-06-03 00:00:00', '2025-06-04 00:00:00', '2025-06-05 00:00:00',
 '2025-06-06 00:00:00', '2025-06-07 00:00:00', '2025-06-08 00:00:00',
 '2025-06-09 00:00:00', '2025-06-10 00:00:00', '2025-06-11 00:00:00',
 '2025-06-12 00:00:00', '2025-06-13 00:00:00', '2025-06-14 00:00:00',
 '2025-06-15 00:00:00', '2025-06-16 00:00:00', '2025-06-17 00:00:00',
 '2025-06-18 00:00:00', '2025-06-19 00:00:00', '2025-06-20 00:00:00',
 '2025-06-21 00:00:00', '2025-06-22 00:00:00', '2025-06-23 00:00:00']
Length: 30, dtype: datetime64[ns]

In [79]:
import requests

response = requests.post('http://127.0.0.1:8000/api/category/wow-growth', json={'req_categories':['topup']})
print(response.json())
df = pd.DataFrame(response, columns=["Category", "Week Forecast (Rs)", "WoW Growth (%)"])
print(df)

{'wow_growth_list': 'nice'}


ValueError: Shape of passed values is (1, 1), indices imply (1, 3)