In [44]:
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv("収入支出管理表.csv")
df.head()

Unnamed: 0,date,category,amount,memo
0,2022/07/24,その他,¥120,スティックノリ(TOEIC写真貼る用)
1,2022/07/24,その他,¥100,水
2,2022/07/24,その他,¥900,TOEIC用写真(写真証明機)
3,2022/07/24,旅費交通費,¥377,電車（海老名→淵野辺）
4,2022/07/25,自己投資,¥270,コメダ珈琲


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1106 entries, 0 to 1105
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   date      1103 non-null   object
 1   category  1106 non-null   object
 2   amount    1106 non-null   object
 3   memo      1104 non-null   object
dtypes: object(4)
memory usage: 34.7+ KB


### date列を日付型にデータ整形するためのコード

In [4]:
# Naデータの削除
df = df[~df["date"].isna() == True]
# dateのフォーマットの日付統一
df["date"] = df["date"].apply(lambda x: x.replace("-", "/"))
df[df["date"].str.contains("-")]

#date列のデータタイプを日付型に変更
df["date"] = pd.to_datetime(df["date"])
display(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 1103 entries, 0 to 1102
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      1103 non-null   datetime64[ns]
 1   category  1103 non-null   object        
 2   amount    1103 non-null   object        
 3   memo      1101 non-null   object        
dtypes: datetime64[ns](1), object(3)
memory usage: 43.1+ KB


None

### amount列をint型に変更するコード

In [5]:
df["amount"] = df["amount"].apply(lambda x: x[1:].replace(",", ""))

In [6]:
df["amount"] = df["amount"].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1103 entries, 0 to 1102
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      1103 non-null   datetime64[ns]
 1   category  1103 non-null   object        
 2   amount    1103 non-null   int64         
 3   memo      1101 non-null   object        
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 43.1+ KB


In [8]:
df[df["date"].between("2023-08-01", "2023-08-31")]

Unnamed: 0,date,category,amount,memo
976,2023-08-01,住宅,98430,家賃
977,2023-08-01,生活,20930,UNIQLO、日傘、PC用眼鏡
978,2023-08-01,自己投資,100,コメダ珈琲
979,2023-08-01,飲食費,500,積田食堂
980,2023-08-01,その他,1553,Amazon？？
...,...,...,...,...
1098,2023-08-30,飲食費,1480,サブウェイ
1099,2023-08-31,通信費,5921,ahamo
1100,2023-08-31,自己投資,4300,コメダ珈琲チケット
1101,2023-08-31,飲食費,850,すき家


In [10]:
df.to_csv("支出管理.csv", index=False)

In [150]:
a = df["category"].value_counts().index
list(a)

['飲食費', '自己投資', 'その他', '旅費交通費', '娯楽', '水道光熱費', '住宅', '生活', '借金', '通信費']

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1103 entries, 0 to 1102
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      1103 non-null   datetime64[ns]
 1   category  1103 non-null   object        
 2   amount    1103 non-null   int64         
 3   memo      1101 non-null   object        
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 43.1+ KB


In [37]:
sum_df = df.groupby("category")[["amount"]].sum().reset_index()
sum_df

Unnamed: 0,category,amount
0,その他,4686687
1,住宅,2251431
2,借金,848087
3,娯楽,609210
4,旅費交通費,79366
5,水道光熱費,158507
6,生活,69463
7,自己投資,2499327
8,通信費,157468
9,飲食費,699688


In [19]:
sum_df = df.groupby("category")["amount"].sum().reset_index()
sum_df

Unnamed: 0,category,amount
0,その他,4686687
1,住宅,2251431
2,借金,848087
3,娯楽,609210
4,旅費交通費,79366
5,水道光熱費,158507
6,生活,69463
7,自己投資,2499327
8,通信費,157468
9,飲食費,699688


In [46]:
labels = df["category"].value_counts().index
sizes = df.groupby("category")["amount"].sum().sort_values()

fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

(-1.0999957480004043,
 1.0999935252175326,
 -1.0999997953629295,
 1.0999999902553776)