In [68]:
from pymongo import MongoClient
import os
import bson

def mongoCollection(connstr, db, collection):
    client = MongoClient(connstr)
    db = client[db]
    return db[collection]

import re
import datetime

hdfcCreditCardInterpreterFormatStr = '%Y-%m-%d:%H:%M:%S'

# ALERT: You've spent Rs.5605.00  on CREDIT Card xx3690 at AARKITHA on 2020-02-22:12:09:36.Avl bal - Rs.286107.00, curr o/s - Rs.13893.00.Not you? Call 18002586161.
def hdfcCreditCardInterpreter(txn):
    x = re.search("ALERT: You\'ve spent Rs.([0-9.]+)\s+on\s+([a-zA-Z0-9\s._]+)? on (\d{4}-\d{2}-\d{2}:\d{2}:\d{2}:\d{2}).Avl bal - Rs.([0-9.]+), curr o\/s - Rs.([0-9]+.[0-9]+).", txn)
    return {
        'expense_amount': float(x.group(1)),
        'payment_mode': x.group(2).split('at', 1)[0].strip(),
        'merchant': x.group(2).split('at', 1)[1].strip(),
        'datetime': datetime.datetime.strptime(x.group(3), hdfcCreditCardInterpreterFormatStr),
        'available_balance': float(x.group(4)),
        'outstanding_amount': float(x.group(5))
    } if x != None else {}

# ALERT:You've spent Rs.428.00 on CREDIT Card xx3690 at FRESHTOH3631510 in BANGALORE on 2020-02-15:16:46:51.Not you?Call 18002586161.
def hdfcCreditCardInterpreter2(txn):
    x = re.search("ALERT:You\'ve spent Rs.([0-9.]+)\s+on\s+([a-zA-Z0-9\s._//]+)? on (\d{4}-\d{2}-\d{2}:\d{2}:\d{2}:\d{2}).", txn)
    return {
        'expense_amount': float(x.group(1)),
        'payment_mode': x.group(2).split('at', 1)[0].strip(),
        'merchant': x.group(2).split('at', 1)[1].strip(),
        'datetime': datetime.datetime.strptime(x.group(3), hdfcCreditCardInterpreterFormatStr)
    } if x != None else {}

# ALERT:You've spent Rs.3638.00 via Debit Card xx6504 at www.lenskart.c on 2019-01-14:20:29:59.Avl Bal Rs.264666.51.Not you?Call 18002586161.
def hdfcCreditCardInterpreter2(txn):
    x = re.search("ALERT:You\'ve spent Rs.([0-9.]+)\s+via\s+([a-zA-Z0-9\s._//]+)? on (\d{4}-\d{2}-\d{2}:\d{2}:\d{2}:\d{2}).", txn)
    return {
        'expense_amount': float(x.group(1)),
        'payment_mode': x.group(2).split('at', 1)[0].strip(),
        'merchant': x.group(2).split('at', 1)[1].strip(),
        'datetime': datetime.datetime.strptime(x.group(3), hdfcCreditCardInterpreterFormatStr)
    } if x != None else {}

from IPython.display import HTML, display
import tabulate

def extract_transactions(i):
    return list(i[1]['transaction'].values()) + [str(i[1]['_id'])]

def mark_duplicate(collection, _id):
    result = collection.update_one({'_id': bson.ObjectId(_id)}, {'$set' : {'status': {'duplicate': True}}})
    print('success' if result.modified_count == 1 else 'failed')
    return result

def add_category_info(collection, _id, category='', sub_category=''):
    result = collection.update_one(
        {'_id': bson.ObjectId(_id)}, 
        {'$set' : {'transaction.category': category, 'transaction.sub_category': sub_category}})
    print('success' if result.modified_count == 1 else 'failed')
    return result

In [6]:
collection = mongoCollection('mongodb+srv://sumitasok:yCzoO98SQBXtfMxA@mongodb-iczgp.mongodb.net/test?retryWrites=true&w=majority', 'smsinfo', 'transactions')

In [69]:
for _item in list(
    enumerate(
        collection.find({
            "message.text": {'$regex': 'ALERT: You\'ve spent Rs.([0-9.]+)'},
            "status.analysis_done": {'$ne': True}}
        ).sort([("message.date",1)]))):
    print("message", _item[1])
    _transaction = hdfcCreditCardInterpreter(_item[1]['message']['text'])
    print("analysis", _transaction, "\n\n")
    result = collection.update_one(
        {'_id': bson.ObjectId(str(_item[1]['_id']))},
        {'$set' : {'transaction': _transaction, 'status': {'analysis_done': True}}})
    print('success' if result.modified_count == 1 else 'unsuccessful')
    
for _item in list(
    enumerate(
        collection.find({
            "message.text": {'$regex': 'ALERT:You\'ve spent Rs.([0-9.]+) on'},
} #             "status.analysis_done": {'$ne': True}
        ).sort([("message.date",1)]))):
    print("message", _item[1])
    _transaction = hdfcCreditCardInterpreter2(_item[1]['message']['text'])
    print("analysis", _transaction, "\n\n")
    if _transaction != {}:
        result = collection.update_one(
            {'_id': bson.ObjectId(str(_item[1]['_id']))},
            {'$set' : {'transaction': _transaction, 'status': {'analysis_done': True}}})
        print('success' if result.modified_count == 1 else 'unsuccessful')

message {'_id': ObjectId('5e26dab4d64bdc5152d03284'), 'message': {'text': "ALERT:You've spent Rs.110.00 on DEBIT/ATM Card xx6504 at SWIGGY1368042 in BANGALORE on 2019-01-21:10:40:16.Not you?Call 18002586161.", 'date': 569740222106927040, 'guid': '130D7998-C095-A2FB-ED30-DB3C3825382F'}, 'status': {'analysis_done': True}, 'transaction': {}}
analysis {'expense_amount': 110.0, 'payment_mode': 'DEBIT/ATM Card xx6504', 'merchant': 'SWIGGY1368042 in BANGALORE', 'datetime': datetime.datetime(2019, 1, 21, 10, 40, 16)} 


success
message {'_id': ObjectId('5e26dab5d64bdc5152d0328e'), 'message': {'text': "ALERT:You've spent Rs.284.00 on DEBIT/ATM Card xx6504 at SWIGGY1368042 in BANGALORE on 2019-01-22:13:06:30.Not you?Call 18002586161.", 'date': 569835401035361024, 'guid': '8B2D5BC0-A913-D303-3EBF-CB19F5347CF2'}, 'status': {'analysis_done': True}, 'transaction': {}}
analysis {'expense_amount': 284.0, 'payment_mode': 'DEBIT/ATM Card xx6504', 'merchant': 'SWIGGY1368042 in BANGALORE', 'datetime': dat

In [70]:
year = 2020
month = 2

next_month = 1 if month == 12 else month + 1
next_year = year + 1 if month == 12 else year

monthly = list(enumerate(collection.find({'status': {'analysis_done': True},
                 'transaction.datetime' : {
                     '$gte': datetime.datetime(year,month,1), '$lt': datetime.datetime(next_year,next_month,1)}
                  }, {'transaction': 1})))

monthly_transactions = list(map(extract_transactions, monthly))

In [71]:
display(HTML(tabulate.tabulate(monthly_transactions, tablefmt='html')))

0,1,2,3,4,5,6,7,8
500.0,CREDIT Card xx3690,PAYTM,2020-02-02 20:44:58,272486.89,27513.1,5e3a783285ce1ed04ce1b6c3,,
465.5,CREDIT Card xx3690,FRESHTOH3631510,2020-02-02 20:42:47,272986.5,27013.5,5e3a783285ce1ed04ce1b6c4,,
465.5,CREDIT Card xx3690,FRESHTOH3631510 in BANGALORE,2020-02-02 20:42:48,5e3a783285ce1ed04ce1b6c5,,,,
400.0,CREDIT Card xx3690,PAYTM3852398,2020-02-02 20:49:59,272086.89,27913.1,5e3a783285ce1ed04ce1b6c6,,
500.0,CREDIT Card xx3690,PAYTM3852398,2020-02-03 23:14:18,271586.0,28414.0,5e3a783385ce1ed04ce1b6cb,,
1514.37,CREDIT Card xx3690,www.bigbasket.,2020-02-06 22:08:21,270071.63,29928.4,5e3e5b35dd2322b0ad5cccfa,,
588.0,CREDIT Card xx3690,MYNTRA72883,2020-02-06 22:50:28,269484.52,30515.5,5e3e5b35dd2322b0ad5cccfc,,
606.0,CREDIT Card xx3690,FRESHTOH3631510,2020-02-07 10:39:43,268878.0,31122.0,5e3e5b35dd2322b0ad5cccff,,
606.0,CREDIT Card xx3690,FRESHTOH3631510 in BANGALORE,2020-02-07 10:39:44,5e3e5b35dd2322b0ad5ccd00,,,,
1204.35,CREDIT Card xx3690,AJIO,2020-02-07 11:22:23,267674.17,32325.8,5e3e5b36dd2322b0ad5ccd02,,


In [58]:
add_category_info(collection, '5e497fb75bc90c80c1c82a81', category='purchase', sub_category='toys')

success


<pymongo.results.UpdateResult at 0x7f2dbef73aa0>

In [None]:
# add_comment
# set_action(Debit/Credit/Transfer)