In [84]:
# Importing necessary libs
import pandas as pd
import numpy as np
import os
import plotly.express as px
from sklearn.preprocessing import LabelEncoder
from dotenv import load_dotenv
import os



In [85]:
# loading the gemeni api key
load_dotenv('apikey.env')

# Retrieve the API key
api_key = os.getenv('API_KEY')


In [86]:

PATH = 'data/'


def merge_datasets():
    """
    Function to merge all the datasets in the specified directory and create a single dataset
    with an additional 'spending_type' column to identify income and expenses.
    """
    df_income = [] 
    df_expense = []
    for file in os.listdir(PATH):
        if file.endswith('.xlsx') or file.endswith('.csv'):  # Corrected the condition
            # Data with income
            df_income_temp = pd.read_excel(f'{PATH}{file}', skiprows=1, sheet_name='Income')
            df_income_temp['spending_type'] = 'Income'  # identifier column
            df_income.append(df_income_temp)

            # Data with expense
            df_expense_temp = pd.read_excel(f'{PATH}{file}', skiprows=1, sheet_name='Expenses')
            df_expense_temp['spending_type'] = 'Expense'  # identifier column
            df_expense.append(df_expense_temp)

    # concatenate income and expense data into a single DataFrame
    df_income = pd.concat(df_income, ignore_index=True)
    df_expense = pd.concat(df_expense, ignore_index=True)

    # merge income and expense data
    df_combined = pd.concat([df_income, df_expense], ignore_index=True)
    
    # Cleaning the data
    df_combined.sort_values(by='Date and time', inplace=True)
    # remove columns
    df_combined.drop(['Amount in account currency', 'Account currency','Transaction amount in transaction currency', 'Transaction currency', 'Tags', 'Account','Default currency', 'Comment'], axis=1, inplace=True) # can include if needed

    return df_combined


In [87]:
df = merge_datasets()

In [88]:
df

Unnamed: 0,Date and time,Category,Amount in default currency,spending_type
144,2023-04-28,SIM,50.00,Expense
13,2023-05-02,Other,1350.00,Income
143,2023-05-03,Groceries,74.52,Expense
138,2023-05-05,Transportation,5.89,Expense
139,2023-05-05,SIM,128.19,Expense
...,...,...,...,...
147,2024-09-24,Cafe,34.28,Expense
146,2024-09-25,Groceries,69.55,Expense
15,2024-09-26,Balancing,8.00,Income
14,2024-09-27,Paycheck,149.00,Income


In [89]:
# sperating date by year and month and day
df['Year'] = pd.DatetimeIndex(df['Date and time']).year
df['Month'] = pd.DatetimeIndex(df['Date and time']).month
# df['Day'] = pd.DatetimeIndex(df['Date and time']).day

df.drop(columns=['Date and time'], inplace=True)
df
#renaming columns
df = df.rename(columns={'spending_type': 'Type', 'Amount in default currency': 'Amount',})

# # reordering columns
df = df[['Year', 'Month', 'Type', 'Category', 'Amount']]


df_expense= df[df['Type'] == 'Expense']
df_income = df[df['Type'] == 'Income']

In [90]:
# total by type
df_total = df.groupby(['Type'], as_index=False)['Amount'].sum()
df_total

Unnamed: 0,Type,Amount
0,Expense,41572.86
1,Income,45533.14


In [91]:
# grouped by category for expense
dfexpense_category = df_expense.groupby(['Category'], as_index=False)['Amount'].sum()
dfexpense_category

Unnamed: 0,Category,Amount
0,Balancing,171.93
1,Cafe,4678.76
2,Education,19795.87
3,Fashion,428.78
4,Gadgets,85.71
5,Gifts,627.72
6,Groceries,2340.62
7,Haircut,65.55
8,Home,10700.0
9,Laptop repair,135.6


In [92]:
dfexpense_month = df_expense.groupby(['Year','Month'], as_index=False)['Amount'].sum()
dfexpense_month

Unnamed: 0,Year,Month,Amount
0,2023,4,50.0
1,2023,5,1435.02
2,2023,6,454.49
3,2023,7,536.26
4,2023,8,1999.06
5,2023,9,2278.62
6,2023,10,8480.87
7,2023,11,2376.88
8,2023,12,429.03
9,2024,1,1143.25


In [93]:
df_income_month = df_income.groupby(['Year','Month'], as_index=False)['Amount'].sum()
df_income_month

Unnamed: 0,Year,Month,Amount
0,2023,5,3384.0
1,2023,7,2013.0
2,2023,8,5090.0
3,2023,9,5000.0
4,2023,10,3055.7
5,2023,11,2000.0
6,2023,12,2243.21
7,2024,1,100.0
8,2024,2,2854.38
9,2024,3,4303.57


In [99]:
# saving the data frames as txt 
# Define the file path where you want to save the output
file_path = "financial_summary.txt"

# Open the file in write mode
with open(file_path, "w") as file:
    
    file.write("You are a financial advisor andb based on the spending habits from the summary data below you will give financial advice. If anything besides financial advice is aksed by the user you will not answer and reply you will only givefinancial advice.\n")
    file.write("\n\n") 
    file.write("You will first give a brief on the users spending habits and give stats in numbers look into the data before answering this do calculation and anything you need and then you will give financial advice based on the user data.\n")
    
        
    file.write("\n\n") 
    file.write("Also try to sound as human as possile dont give generic advice act like a person in a conversation:\n") 
    
    file.write("\n\n") 
    file.write("The user data is below as follows:\n") 
    file.write("\n\n") 
    
    file.write("This is dataframe for total expenses and imcome:\n")
    file.write(df_total.to_string())  # Convert the expenditure DataFrame to string
    file.write("\n\n")  # Add a new line for separation
    
    file.write("This is the expenses by month :\n")
    file.write(dfexpense_month.to_string())  
    file.write("\n\n") 
    
    file.write("This is the income by month :\n")
    file.write(dfexpense_month.to_string())  
    file.write("\n\n") 
    
    file.write("This is the expenses in different category :\n")
    file.write(dfexpense_category.to_string())  
    file.write("\n\n") 

# The file is automatically closed after the 'with' block
print(f"Data saved successfully to {file_path}")


Data saved successfully to financial_summary.txt


In [100]:


import os
import google.generativeai as genai

genai.configure(api_key=os.environ["api_key"])

# Create the model
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
  # safety_settings = Adjust safety settings
  # See https://ai.google.dev/gemini-api/docs/safety-settings
)
file_path = "financial_summary.txt"

with open(file_path, "r") as file:
    file_content = file.read()
    
response =  model.generate_content(file_content)
print(response.text)

Okay, so based on your spending data, you've had a total income of $45,533.14 and expenses of $41,572.86 over a period of time. That means you have a surplus of $3,960.28. 

Looking at your monthly expenses, you've had some pretty significant spikes, particularly in June and December 2023 and June 2024.  This suggests there might be some irregular expenses happening, like perhaps a large purchase or a one-time bill that needs addressing.  

It's also interesting to note that your monthly income and expenses seem to be fairly consistent, which is good. However, there are some categories where your spending is a bit high. For example, your "Education" category has a significant amount, $19,795.87. That's a big chunk of your expenses!

So here's my advice: 

1. **Track those irregular expenses**: Those spikes need to be investigated. What caused those large expenses in June and December? Understanding the reason can help you better plan for them in the future. 
2. **Review your education 

In [None]:
# note : Try to retain chat history and make it more like a chatbot that can answer the financial questions.
