In [1]:
import pandas as pd
import plotly.graph_objects as go

In [33]:
# Read CSV file

expenses_df = pd.read_csv('expenses.csv')
expenses_df['date'] = pd.to_datetime(expenses_df['date'])

print(expenses_df.head())
print('\n\n')
print(expenses_df.dtypes)

        date  category            type    cost
0 2020-04-29  shopping         redtick   25.40
1 2020-05-01   eat out        Dominoes   40.06
2 2020-05-02  shopping  village grocer   72.40
3 2020-05-09  shopping  village grocer  121.75
4 2020-05-09   eat out  MY81 Hyderabad   40.00



date        datetime64[ns]
category            object
type                object
cost               float64
dtype: object


In [34]:
# Find the missing dates

# Sort the dataframe, based on date (earliest to latest)
expenses_df.sort_values(by='date').reset_index(drop=True, inplace=True)

# Get the earliest and latest date
earliest_date, latest_date = expenses_df['date'].iloc[[0,-1]]

date_range = pd.date_range(start = earliest_date, end = latest_date)

zero_expense_dates = date_range.difference(expenses_df['date'])

zero_expense_df = pd.DataFrame(columns=expenses_df.columns)

for date in zero_expense_dates:
    expenses_df = expenses_df.append({
        'date': date,
        'category': 'zero expenses',
        'type': '-',
        'cost': 0.00
    }, ignore_index=True)
    
print(expenses_df)

         date       category            type    cost
0  2020-04-29       shopping         redtick   25.40
1  2020-05-01        eat out        Dominoes   40.06
2  2020-05-02       shopping  village grocer   72.40
3  2020-05-09       shopping  village grocer  121.75
4  2020-05-09        eat out  MY81 Hyderabad   40.00
..        ...            ...             ...     ...
80 2020-06-17  zero expenses               -    0.00
81 2020-06-18  zero expenses               -    0.00
82 2020-06-22  zero expenses               -    0.00
83 2020-06-24  zero expenses               -    0.00
84 2020-06-25  zero expenses               -    0.00

[85 rows x 4 columns]


In [35]:
# Time series plot
costs_df = expenses_df[['date','cost']]
costs_df = costs_df.groupby(['date']).sum().reset_index()

# Begin plotting here
fig = go.Figure([
    go.Scatter(
        x=costs_df['date'], 
        y=costs_df['cost'],
        mode='lines+markers'
    )
])

fig.update_layout(title_text='Overview of expenses')
fig.update_xaxes(rangeslider_visible=True)
fig.show()


In [None]:
# Bar chart for days comparisons
day_costs_df = expenses_df[['date','cost']]

day_costs_df['date'] = pd.to_datetime(day_costs_df.loc[:,('date')])
day_costs_df['day_of_week'] = day_costs_df['date'].dt.day_name()

day_costs_df = day_costs_df.groupby(['date','day_of_week']).sum().reset_index()