In [47]:
import pandas as pd
import numpy as np 
import plotly.express as px
import statsmodels
import plotly.graph_objects as go
from faker import Faker
import random
import datetime


from faker.providers import DynamicProvider

faker = Faker('en_US')

faker.add_provider(DynamicProvider(
     provider_name="subtype",
     elements=["checking", "savings"],
))

faker.add_provider(DynamicProvider(
     provider_name="type",
     elements=["depository"],
))

faker.add_provider(DynamicProvider(
     provider_name="transaction_type",
     elements=["special", "place"],
))

faker.add_provider(DynamicProvider(
     provider_name="personal_finance_category",
     elements=["entertainment", "trnasportation", "food_and_drink", "travel"],
))


def generate_random_date(start_date, end_date):
    start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    return random_date


In [19]:
def get_accounts():
    accounts = []

    for i in range(10):
        accounts.append({
        "account_id": faker.uuid4(),
        "official_name": faker.company(),
        "type": faker.type(),
        "subtype": faker.subtype(),
        "balance_current": random.randint(100, 1000),
        "balance_available": random.randint(100, 1000),
        "balance_limit": 10_000,
        "rate": 3.4
        })
        
    return accounts

    
accounts = pd.DataFrame(get_accounts())
accounts

accounts['account_id']



0    8040eb60-dad8-4d03-a158-53c2d0571d91
1    2a3da537-90ca-4d3a-b5b1-1b16263a759f
2    d5c8549f-1bba-41ac-8bb9-2a6afdf88c8e
3    96cebff6-ac88-4347-86b4-ffdd49bda362
4    881f930a-fdbd-4035-8dfd-6d050ec07551
5    95b23041-ca63-4866-b8fb-be44ac1e4082
6    37a7403b-b656-4be8-9599-67e8bb996a28
7    2993e6e9-8478-46c7-9ced-4b9201b02f38
8    95395096-0777-4c51-9990-831fe47c8e84
9    2c821091-a11e-425b-acf1-4d23b2e65a35
Name: account_id, dtype: object

In [50]:
def get_transactions(count=50):
    transactions = []
    transaction_faker = Faker('en_US')
    transaction_faker.add_provider(DynamicProvider(
        provider_name="accounts",
        elements=list(accounts['account_id']),
    ))

    for i in range(count):      
        transactions.append({
        "transaction_id": faker.uuid4(),
        "account_id": transaction_faker.accounts(),
        "amount": random.randint(1, 100),
        "date": generate_random_date("2023-01-01", "2024-01-01"),
        "category_id": random.randint(1000,9999),
        "merchant_name": faker.company(),
        "merchant_entity_id": faker.uuid4(),
        "website": faker.url(),
        "pending": False,
        "transaction_type": faker.transaction_type(),
        "personal_finance_category_primary": faker.personal_finance_category(),
        "personal_finance_category_detailed": faker.personal_finance_category(),
        })
        
    return transactions

    
    
pd.DataFrame(get_transactions())




Unnamed: 0,transaction_id,account_id,amount,date,category_id,merchant_name,merchant_entity_id,website,pending,transaction_type,personal_finance_category_primary,personal_finance_category_detailed
0,8e294c9e-0fa9-41e3-86fb-09f285e6f3eb,2c821091-a11e-425b-acf1-4d23b2e65a35,68,2023-07-31,6433,"Rodriguez, Hart and Shelton",0dea61d3-d6da-433d-944f-b26a2ad96405,http://www.jones.com/,False,place,entertainment,travel
1,7a44e1bb-196a-4872-a895-0f5ccd908564,96cebff6-ac88-4347-86b4-ffdd49bda362,46,2023-05-15,6087,Barrett-Walter,84da2631-8a82-4da9-aa8d-c5f92acc3745,http://www.johnson-cobb.org/,False,special,food_and_drink,travel
2,7c6ecc90-2675-47e2-8341-bdbd9f1db005,37a7403b-b656-4be8-9599-67e8bb996a28,55,2023-01-04,8365,"Martinez, Schneider and Austin",efcc6f91-d4ec-4ef3-ba62-2c03e45f4e17,http://www.gutierrez.info/,False,place,travel,travel
3,751daadb-88e6-4a51-9c95-716b1fa716db,2a3da537-90ca-4d3a-b5b1-1b16263a759f,14,2023-10-07,5261,Lee and Sons,86179ad1-9a43-409a-8b3c-8ad49c772286,http://www.bishop-torres.com/,False,special,travel,food_and_drink
4,07b2bd06-6e7a-4580-9cd8-ff8a6c853355,2a3da537-90ca-4d3a-b5b1-1b16263a759f,37,2023-10-20,2268,Campbell-Vance,2030b428-7a2d-45f4-8f57-0d0b203a6242,https://salazar.net/,False,place,travel,entertainment
5,ee76f71f-98ac-4271-a376-4fd2069edfe3,95b23041-ca63-4866-b8fb-be44ac1e4082,65,2023-07-24,3066,Martinez LLC,7acdf15c-ace2-4961-b17a-56f62f098669,https://clarke-gaines.biz/,False,place,food_and_drink,food_and_drink
6,9ff109b3-1e51-4c43-9f43-6dfb541be3c1,95395096-0777-4c51-9990-831fe47c8e84,57,2023-01-18,1888,Conway Group,774de2d2-383d-4407-91d0-98daa81e251e,https://www.bonilla-hall.com/,False,place,trnasportation,travel
7,9cf19838-0ce1-4989-9ffe-7431b4af4260,2993e6e9-8478-46c7-9ced-4b9201b02f38,16,2023-09-24,2636,"Bauer, Lee and Walls",6a7d32d0-03e0-45e7-aca3-9556e8551153,http://allen.org/,False,place,food_and_drink,travel
8,7e63bf9c-dd99-46e0-9dc9-7520d5e93d14,96cebff6-ac88-4347-86b4-ffdd49bda362,66,2023-04-25,1577,"Hancock, Moon and Wallace",fc61bf7d-766b-435a-a9d2-c3857d389a79,https://stevenson-johnson.biz/,False,special,travel,food_and_drink
9,048eec16-f6b6-4300-809b-68a309e569f2,2993e6e9-8478-46c7-9ced-4b9201b02f38,13,2023-09-30,8671,"Franklin, Lopez and Kim",58b7e4c1-ea14-49d2-bc76-bc4978384985,http://www.drake.biz/,False,place,entertainment,travel


In [65]:
date = df.date.str.split('/', expand =True)
df['month'] = date[0]
df['day'] = date[1]
df['year'] = date[2]

df.year = pd.to_numeric(df.year)
df.month = pd.to_numeric(df.month)
df.day = pd.to_numeric(df.day)

In [66]:
df.date = pd.to_datetime(df.date)

In [67]:
df.head()

Unnamed: 0,date,item_name,item_category,price,month,day,year
0,2021-11-29,tea,food,10.0,11,29,2021
1,2021-11-29,cold coffe,food,35.0,11,29,2021
2,2021-11-29,flat coffe,food,10.0,11,29,2021
3,2021-11-29,biscuits,food,10.0,11,29,2021
4,2021-11-29,medicine,medicine,500.0,11,29,2021


In [71]:
def date_wise_df(value) : 
    dummy_df = df.groupby([value])['price'].sum().reset_index()
    fig = px.bar(dummy_df, x = value, y = 'price')
    return fig.show()

date_wise_df('month')

In [73]:
df['month_year'] = df['date'].dt.to_period('M')
dummy_df = df.groupby(['month_year', 'item_category'])['price'].sum().reset_index()
dummy_df.month_year = dummy_df.month_year.astype(str)
fig = px.bar(dummy_df, x="month_year", y="price", color="item_category")
fig.show()

# fig = go.Figure()
# counter, color_list = 0, ['#f71616', '#f71616', '#f76c16', '#bf7411', '#bfb911', '#94bf11', '#54bf11',  '#bf7411', '#bfb911', '#94bf11', '#54bf11']
# for category in dummy_df.item_category.unique() :
#     temp_df = dummy_df[dummy_df.item_category == category]
#     fig.add_trace(go.Bar(
#         x=temp_df.month_year,
#         y=temp_df.price,
#         name= category,
#         # marker_color= color_list[counter]
#     ))
#     counter += 1 

# fig.update_layout(plot_bgcolor="white", barmode='group')

In [13]:
category_df = df.groupby(['item_category'])['price'].sum().reset_index()

In [14]:
fig = px.pie(category_df, values = 'price', names = 'item_category')
fig.show()

In [15]:
date_df = df.groupby(['date'])['price'].sum().reset_index()
fig = px.line(date_df, x = 'date', y = 'price')
fig.show()

In [16]:
food = df[df.item_category == 'food']
food[food.price == food.price.max()]

Unnamed: 0,date,item_name,item_category,price,month,day,year
157,2022-02-06,pizza,food,567,2,6,2022


In [17]:
def plot_box_plot(feature) : 
    dummy_df = df[df.item_category == feature]
    fig = px.box(dummy_df, x = 'price')
    return fig.show()

plot_box_plot('travel')

In [18]:
total_df = pd.read_csv('data/bangalore  - Total_spending.csv')
total_df = total_df.T.reset_index().drop(4)

In [19]:
px.pie(total_df, names = 'index', values = 0)

In [20]:
dummy_df = df.rename(columns = {'price' : 'total spending'})
fig = px.treemap(
    dummy_df, path = [px.Constant('total spending'), 'item_name'], 
    values = 'total spending'
)

fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

