In [4]:
import pandas as pd
import numpy as np

In [6]:
df = pd.read_csv('expense.csv')
print(df)

     User                   Email       Category   Amount        Date
0  Nikita    nikitays06@gmail.com        Grocery  $150.50  2025-06-15
1  Nikita    nikitays06@gmail.com      Transport   $60.00  2025-06-20
2   Arivu   arivunazi21@gmail.com        Grocery   $90.00  2025-06-12
3   Arivu   arivunazi21@gmail.com  Entertainment  $120.75  2025-07-05
4  Lokesh  lokeynikey21@gmail.com      Utilities  $200.00  2025-07-08
5  Lokesh  lokeynikey21@gmail.com      Transport   $75.25  2025-06-28
6  Nikita    nikitays06@gmail.com  Entertainment  $110.10  2025-07-03
7   Arivu   arivunazi21@gmail.com      Utilities   $85.40  2025-07-12
8  Lokesh  lokeynikey21@gmail.com        Grocery  $130.30  2025-07-14



Clean and convert data

In [7]:
df['Amount'] = df['Amount'].replace('[\$,]', '', regex=True).astype(float)
print(df)

     User                   Email       Category  Amount        Date
0  Nikita    nikitays06@gmail.com        Grocery  150.50  2025-06-15
1  Nikita    nikitays06@gmail.com      Transport   60.00  2025-06-20
2   Arivu   arivunazi21@gmail.com        Grocery   90.00  2025-06-12
3   Arivu   arivunazi21@gmail.com  Entertainment  120.75  2025-07-05
4  Lokesh  lokeynikey21@gmail.com      Utilities  200.00  2025-07-08
5  Lokesh  lokeynikey21@gmail.com      Transport   75.25  2025-06-28
6  Nikita    nikitays06@gmail.com  Entertainment  110.10  2025-07-03
7   Arivu   arivunazi21@gmail.com      Utilities   85.40  2025-07-12
8  Lokesh  lokeynikey21@gmail.com        Grocery  130.30  2025-07-14


In [8]:
df['Date'] = pd.to_datetime(df['Date'])
print(df)

     User                   Email       Category  Amount       Date
0  Nikita    nikitays06@gmail.com        Grocery  150.50 2025-06-15
1  Nikita    nikitays06@gmail.com      Transport   60.00 2025-06-20
2   Arivu   arivunazi21@gmail.com        Grocery   90.00 2025-06-12
3   Arivu   arivunazi21@gmail.com  Entertainment  120.75 2025-07-05
4  Lokesh  lokeynikey21@gmail.com      Utilities  200.00 2025-07-08
5  Lokesh  lokeynikey21@gmail.com      Transport   75.25 2025-06-28
6  Nikita    nikitays06@gmail.com  Entertainment  110.10 2025-07-03
7   Arivu   arivunazi21@gmail.com      Utilities   85.40 2025-07-12
8  Lokesh  lokeynikey21@gmail.com        Grocery  130.30 2025-07-14



**Group by month and category**

In [10]:
df['Month'] = df['Date'].dt.to_period('M')
print(df)

     User                   Email       Category  Amount       Date    Month
0  Nikita    nikitays06@gmail.com        Grocery  150.50 2025-06-15  2025-06
1  Nikita    nikitays06@gmail.com      Transport   60.00 2025-06-20  2025-06
2   Arivu   arivunazi21@gmail.com        Grocery   90.00 2025-06-12  2025-06
3   Arivu   arivunazi21@gmail.com  Entertainment  120.75 2025-07-05  2025-07
4  Lokesh  lokeynikey21@gmail.com      Utilities  200.00 2025-07-08  2025-07
5  Lokesh  lokeynikey21@gmail.com      Transport   75.25 2025-06-28  2025-06
6  Nikita    nikitays06@gmail.com  Entertainment  110.10 2025-07-03  2025-07
7   Arivu   arivunazi21@gmail.com      Utilities   85.40 2025-07-12  2025-07
8  Lokesh  lokeynikey21@gmail.com        Grocery  130.30 2025-07-14  2025-07


In [12]:
monthly_tot = df.groupby(['Month', 'Category'])['Amount'].sum().unstack().fillna(0)
print(" Monthly Total by Category:\n ", monthly_tot)

 Monthly Total by Category:
  Category  Entertainment  Grocery  Transport  Utilities
Month                                                 
2025-06            0.00    240.5     135.25        0.0
2025-07          230.85    130.3       0.00      285.4




**Monthly average by user**

In [13]:

monthly_avg = df.groupby(['User', 'Month'])['Amount'].mean().unstack().fillna(0)
print("\n Monthly Average Expense per User:\n", monthly_avg)


 Monthly Average Expense per User:
 Month   2025-06  2025-07
User                    
Arivu     90.00  103.075
Lokesh    75.25  165.150
Nikita   105.25  110.100




**Cleaned final dataset**

In [14]:

df_cleaned = df.sort_values(by='Date')
print("\n Cleaned Dataset:\n", df_cleaned)


 Cleaned Dataset:
      User                   Email       Category  Amount       Date    Month
2   Arivu   arivunazi21@gmail.com        Grocery   90.00 2025-06-12  2025-06
0  Nikita    nikitays06@gmail.com        Grocery  150.50 2025-06-15  2025-06
1  Nikita    nikitays06@gmail.com      Transport   60.00 2025-06-20  2025-06
5  Lokesh  lokeynikey21@gmail.com      Transport   75.25 2025-06-28  2025-06
6  Nikita    nikitays06@gmail.com  Entertainment  110.10 2025-07-03  2025-07
3   Arivu   arivunazi21@gmail.com  Entertainment  120.75 2025-07-05  2025-07
4  Lokesh  lokeynikey21@gmail.com      Utilities  200.00 2025-07-08  2025-07
7   Arivu   arivunazi21@gmail.com      Utilities   85.40 2025-07-12  2025-07
8  Lokesh  lokeynikey21@gmail.com        Grocery  130.30 2025-07-14  2025-07




Export cleaned data

In [16]:

df_cleaned.to_csv('cleaned_expenses.csv', index=False)