In [None]:
# !pip install langchain_community

In [None]:
from langchain_community.llms import Ollama

In [4]:
llm = Ollama(model="llama2")
llm.invoke("The first man on the moon was ...")

'\nThe first man to walk on the moon was Neil Armstrong. He stepped out of the Apollo 11 lunar module Eagle and onto the moon\'s surface on July 20, 1969, famously declaring "That\'s one small step for man, one giant leap for mankind" as he took his first steps. Armstrong was followed by fellow astronaut Edwin "Buzz" Aldrin, who also walked on the moon during the mission.'

In [5]:
llm.invoke("Can you add an appropriate category next to each of the following expenses. Respond with a list of categories separated by commas. For example, Airtel Bill Payment - Utilities, Zomato Food Order  - Food, etc.: \
Reliance Fresh Groceries, Flipkart Shopping , Restaurant Bill , Grocery from D-Mart ")

'Sure! Here are some categories you can use:\n\nGrocery from D-Mart - Groceries\nFlipkart Shopping - Online Shopping\nReliance Fresh Groceries - Groceries\nRestaurant Bill - Food\nAirtel Bill Payment - Utilities'

### Read transaction data

In [6]:
# Read the transactions_2022_2023.csv file 
import pandas as pd
df = pd.read_csv("transactions_2022_2023.csv")
df.head()

Unnamed: 0,Date,Name / Description,Expense/Income,Amount (Rupee)
0,12/30/2022,Raj's Dhaba,Expense,500
1,12/30/2022,Airtel Bill Payment,Expense,300
2,12/30/2022,House Rent,Expense,15000
3,12/30/2023,Sweta's Boutique Purchase,Expense,1200
4,12/29/2023,Freelance Web Design Project,Income,8000


In [7]:
# Get unique transactions in the Name / Description column
unique_transactions = df["Name / Description"].unique()
len(unique_transactions)

19

In [8]:
unique_transactions[1:10]

array(['Airtel Bill Payment', 'House Rent', "Sweta's Boutique Purchase",
       'Freelance Web Design Project', 'Zomato Food Order',
       'Grocery from D-Mart', 'Tuition Fees Received',
       'Diwali Donation to NGO', 'Salary'], dtype=object)

### Categorise bank transactions with Llama2

In [9]:
# Get index list
#https://stackoverflow.com/questions/47518609/for-loop-range-and-interval-how-to-include-last-step
def hop(start, stop, step):
    for i in range(start, stop, step):
        yield i
    yield stop

index_list = list(hop(0, len(unique_transactions), 30))
index_list

[0, 19]

In [10]:
def categorize_transactions(transaction_names, llm):
    response = llm.invoke("Can you add an appropriate category to the following expenses. For example: Airtel Bill Payment - Utilities, Zomato Food Order  - Food, etc.. Categories should be less than 4 words. " + transaction_names)
    response = response.split('\n')

    print(response)

    # Put in dataframe
    categories_df = pd.DataFrame({'Transaction vs category': response})
    categories_df[['Transaction', 'Category']] = categories_df['Transaction vs category'].str.split(' - ', expand=True)
    
    return categories_df

In [11]:
# Test out the function
categorize_transactions('Airtel Bill Payment, Zomato Food Order ',
                        llm)

['Sure! Here are some appropriate categories for the expenses you listed:', '', '1. Airtel Bill Payment - Utilities', '2. Zomato Food Order - Food', '3. Grocery Shopping - Food', '4. Online Subscription - Entertainment', '5. Movies Ticket Booking - Entertainment', '6. Train Ticket Booking - Travel', '7. Bus Ticket Booking - Transportation', '8. Gas Bill Payment - Utilities', '9. Electricity Bill Payment - Utilities', '10. Water Bill Payment - Utilities']


Unnamed: 0,Transaction vs category,Transaction,Category
0,Sure! Here are some appropriate categories for...,Sure! Here are some appropriate categories for...,
1,,,
2,1. Airtel Bill Payment - Utilities,1. Airtel Bill Payment,Utilities
3,2. Zomato Food Order - Food,2. Zomato Food Order,Food
4,3. Grocery Shopping - Food,3. Grocery Shopping,Food
5,4. Online Subscription - Entertainment,4. Online Subscription,Entertainment
6,5. Movies Ticket Booking - Entertainment,5. Movies Ticket Booking,Entertainment
7,6. Train Ticket Booking - Travel,6. Train Ticket Booking,Travel
8,7. Bus Ticket Booking - Transportation,7. Bus Ticket Booking,Transportation
9,8. Gas Bill Payment - Utilities,8. Gas Bill Payment,Utilities


In [12]:
# Intialise the categories_df_all dataframe
categories_df_all = pd.DataFrame()

# Loop through the index_list
for i in range(0, len(index_list)-1):
    transaction_names = unique_transactions[index_list[i]:index_list[i+1]]
    transaction_names = ','.join(transaction_names)

    categories_df = categorize_transactions(transaction_names, llm)
    categories_df_all = pd.concat([categories_df_all, categories_df], ignore_index=True)

['Sure! Here are the categories I would suggest for each of the expenses you provided:', '', "1. Raj's Dhaba - Food", '2. Airtel Bill Payment - Utilities', '3. House Rent - Housing', "4. Sweta's Boutique Purchase - Shopping", '5. Freelance Web Design Project - Business', '6. Zomato Food Order - Food', '7. Grocery from D-Mart - Groceries', '8. Tuition Fees Received - Income', '9. Diwali Donation to NGO - Charity', '10. Salary - Income', '11. Amazon Shopping - Shopping', '12. Reliance Fresh Groceries - Groceries', '13. Income from Rent - Income', '14. Flipkart Shopping - Shopping', '15. Electricity Bill - Utilities', '16. Internet Bill - Utilities', '17. Car EMI - Transportation', '18. Income from Shares - Investment', '19. Restaurant Bill - Dining Out', '', 'I hope this helps you organize your expenses in a more meaningful way!']


In [13]:
categories_df_all

Unnamed: 0,Transaction vs category,Transaction,Category
0,Sure! Here are the categories I would suggest ...,Sure! Here are the categories I would suggest ...,
1,,,
2,1. Raj's Dhaba - Food,1. Raj's Dhaba,Food
3,2. Airtel Bill Payment - Utilities,2. Airtel Bill Payment,Utilities
4,3. House Rent - Housing,3. House Rent,Housing
5,4. Sweta's Boutique Purchase - Shopping,4. Sweta's Boutique Purchase,Shopping
6,5. Freelance Web Design Project - Business,5. Freelance Web Design Project,Business
7,6. Zomato Food Order - Food,6. Zomato Food Order,Food
8,7. Grocery from D-Mart - Groceries,7. Grocery from D-Mart,Groceries
9,8. Tuition Fees Received - Income,8. Tuition Fees Received,Income


In [15]:
#categories_df_all.to_csv("categories_df_all.csv", index=False)

In [16]:
# Get unique categories in categories_df_all
unique_categories = categories_df_all["Category"].unique()
unique_categories

array([None, 'Food', 'Utilities', 'Housing', 'Shopping', 'Business',
       'Groceries', 'Income', 'Charity', 'Transportation', 'Investment',
       'Dining Out'], dtype=object)

In [17]:
# Drop NA values
categories_df_all = categories_df_all.dropna()

# If category contains "Food", then categorise as "Food and Drinks"
categories_df_all.loc[categories_df_all['Category'].str.contains("Food"), 'Category'] = "Food and Drinks"
# If category contains "Clothing", then categorise as "Clothing"
categories_df_all.loc[categories_df_all['Category'].str.contains("Clothing"), 'Category'] = "Clothing"
# If category contains "Services", then categorise as "Services"
categories_df_all.loc[categories_df_all['Category'].str.contains("Services"), 'Category'] = "Services"
# If category contains "Health" or "Wellness", then categorise as "Health and Wellness"
categories_df_all.loc[categories_df_all['Category'].str.contains("Health|Wellness"), 'Category'] = "Health and Wellness"
# If category contains "Sport", then categorise as "Sport
#  and Fitness"
categories_df_all.loc[categories_df_all['Category'].str.contains("Sport"), 'Category'] = "Sport and Fitness"
# If category contains "Travel", then categorise as "Travel"
categories_df_all.loc[categories_df_all['Category'].str.contains("Travel"), 'Category'] = "Travel"

In [18]:
# Remove the numbering eg "1. " from Transaction column
categories_df_all['Transaction'] = categories_df_all['Transaction'].str.replace(r'\d+\.\s+', '')
categories_df_all

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  categories_df_all['Transaction'] = categories_df_all['Transaction'].str.replace(r'\d+\.\s+', '')


Unnamed: 0,Transaction vs category,Transaction,Category
2,1. Raj's Dhaba - Food,1. Raj's Dhaba,Food and Drinks
3,2. Airtel Bill Payment - Utilities,2. Airtel Bill Payment,Utilities
4,3. House Rent - Housing,3. House Rent,Housing
5,4. Sweta's Boutique Purchase - Shopping,4. Sweta's Boutique Purchase,Shopping
6,5. Freelance Web Design Project - Business,5. Freelance Web Design Project,Business
7,6. Zomato Food Order - Food,6. Zomato Food Order,Food and Drinks
8,7. Grocery from D-Mart - Groceries,7. Grocery from D-Mart,Groceries
9,8. Tuition Fees Received - Income,8. Tuition Fees Received,Income
10,9. Diwali Donation to NGO - Charity,9. Diwali Donation to NGO,Charity
11,10. Salary - Income,10. Salary,Income


In [19]:
# Merge the categories_df_all with the transactions_2022_2023.csv dataframe (df)
df = pd.read_csv("transactions_2022_2023.csv")
df.loc[df['Name / Description'].str.contains("Food"), 'Name / Description'] = "Raj's Dhaba"
df = pd.merge(df, categories_df_all, left_on='Name / Description', right_on='Transaction', how='left')
df

Unnamed: 0,Date,Name / Description,Expense/Income,Amount (Rupee),Transaction vs category,Transaction,Category
0,12/30/2022,Raj's Dhaba,Expense,500,,,
1,12/30/2022,Airtel Bill Payment,Expense,300,,,
2,12/30/2022,House Rent,Expense,15000,,,
3,12/30/2023,Sweta's Boutique Purchase,Expense,1200,,,
4,12/29/2023,Freelance Web Design Project,Income,8000,,,
5,12/29/2023,Raj's Dhaba,Expense,450,,,
6,12/23/2023,Grocery from D-Mart,Expense,2000,,,
7,12/22/2023,Tuition Fees Received,Income,6000,,,
8,12/22/2023,Diwali Donation to NGO,Expense,500,,,
9,12/20/2023,Salary,Income,25000,,,


In [20]:
df.to_csv("transactions_2022_2023_categorized.csv", index=False)