In [29]:
import pandas as pd
import numpy as np
import csv
pd.options.mode.chained_assignment = None  # default='warn'

In [42]:
account_mapper = {
    '中国银行信用卡(7211)': '中行7211',
    '招商银行信用卡(0565)': '招行0565',
}
category_mapper = {
    '餐饮美食' : '餐饮&外出就餐 > 餐饮/外出就餐',
    '充值缴费' : '账单 > 燃气',
    '服饰装扮' : '衣服 > 服装',
    '公共服务' : '其他',
    '家居家装' : '房屋 > 家具/装饰品',
    '其他' : '其他',
    '日用百货' : '其他',
    '数码电器' : '数码 > 其他',
    '医疗健康' : '卫生保健 > 医疗'
}

In [None]:
record_list = []
with open('bill_data_original.csv', encoding='utf-8') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        if len(row) > 2:
            row = [x.strip() for x in row]
            record_list.append(row)

file_path = 'bill_data.csv'
with open(file_path,'w', encoding='utf-8', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(record_list)

In [50]:
bill = pd.read_csv(file_path, encoding='utf-8')
bill = bill.drop(columns=['Unnamed: 11'])
bill = bill.fillna('')

bill = bill[~bill['交易状态'].str.contains('失败')]
bill = bill[~bill['交易状态'].str.contains('交易关闭')]
bill = bill[~bill['商品说明'].str.contains('^余额宝-.*-收益发放')]
bill = bill[bill['金额'] > 0]

bill['Currency'] = 'CNY'
bill['Payee'] = bill['交易对方']
bill = bill.drop(columns=['交易对方'])
bill['Date'] = bill['交易时间'].str.split(' ', expand=True)[0]
bill = bill.drop(columns=['交易时间'])
bill = bill.drop(columns=['交易订单号'])
bill = bill.drop(columns=['商家订单号'])
bill['Description'] = bill['商品说明']
bill = bill.drop(columns=['商品说明'])

expense_bill = bill[bill['收/支'] == '支出']
income_bill = bill[bill['收/支'] == '收入']
other_bill = bill[bill['收/支'] == '其他']



In [None]:
expense_bill['Amount'] = expense_bill.apply(lambda row: -1 * row['金额'], axis=1)
expense_bill['Account'] = expense_bill.apply(lambda row: account_mapper.get(row['收/付款方式'], ''), axis=1)
expense_bill['Transfers'] = ''
expense_bill['Category'] = expense_bill.apply(lambda row: category_mapper.get(row['交易分类'], '其他'), axis=1)

expense_bill = expense_bill.drop(columns=['收/支', '对方账号', '收/付款方式', '金额', '交易分类', '交易状态'])
expense_bill.to_csv('expense_bill.csv', index=False)

In [51]:
other_bill = other_bill[other_bill['Description'].str.contains('^退款')]

other_bill['Amount'] = other_bill['金额']
other_bill['Account'] = other_bill.apply(lambda row: account_mapper.get(row['收/付款方式'], ''), axis=1)
other_bill['Transfers'] = ''
other_bill['Category'] = '其他'

other_bill = other_bill.drop(columns=['收/支', '对方账号', '收/付款方式', '金额', '交易分类', '交易状态'])

other_bill.to_csv('other_bill.csv', index=False)