In [56]:
from pymongo import MongoClient
import os
import bson

def mongoCollection(connstr, db, collection):
    client = MongoClient(connstr)
    db = client[db]
    return db[collection]

import re
import datetime

collection = mongoCollection(os.environ.get('MONGODB_CONN_STR'), 'smsinfo', 'transactions')

hdfcCreditCardInterpreterFormatStr = '%Y-%m-%d:%H:%M:%S'

# ALERT: You've spent Rs.5605.00  on CREDIT Card xx3690 at AARKITHA on 2020-02-22:12:09:36.Avl bal - Rs.286107.00, curr o/s - Rs.13893.00.Not you? Call 18002586161.
def hdfcCreditCardInterpreter(txn):
    x = re.search("ALERT: You\'ve spent Rs.([0-9.]+)\s+on\s+([a-zA-Z0-9\s._]+)? on (\d{4}-\d{2}-\d{2}:\d{2}:\d{2}:\d{2}).Avl bal - Rs.([0-9.]+), curr o\/s - Rs.([0-9]+.[0-9]+).", txn)
    return {
        'expense_amount': float(x.group(1)),
        'payment_mode': x.group(2).split('at', 1)[0].strip(),
        'merchant': x.group(2).split('at', 1)[1].strip(),
        'datetime': datetime.datetime.strptime(x.group(3), hdfcCreditCardInterpreterFormatStr),
        'available_balance': float(x.group(4)),
        'outstanding_amount': float(x.group(5))
    } if x != None else {}

# ALERT:You've spent Rs.428.00 on CREDIT Card xx3690 at FRESHTOH3631510 in BANGALORE on 2020-02-15:16:46:51.Not you?Call 18002586161.
def hdfcCreditCardInterpreter2(txn):
    x = re.search("ALERT:You\'ve spent Rs.([0-9.]+)\s+on\s+([a-zA-Z0-9\s._//]+)? on (\d{4}-\d{2}-\d{2}:\d{2}:\d{2}:\d{2}).", txn)
    return {
        'expense_amount': float(x.group(1)),
        'payment_mode': x.group(2).split('at', 1)[0].strip(),
        'merchant': x.group(2).split('at', 1)[1].strip(),
        'datetime': datetime.datetime.strptime(x.group(3), hdfcCreditCardInterpreterFormatStr)
    } if x != None else {}

# ALERT:You've spent Rs.3638.00 via Debit Card xx6504 at www.lenskart.c on 2019-01-14:20:29:59.Avl Bal Rs.264666.51.Not you?Call 18002586161.
def hdfcCreditCardInterpreterVia(txn):
    x = re.search("ALERT:You\'ve spent Rs.([0-9.]+)\s+via\s+([a-zA-Z0-9\s._//]+)? on (\d{4}-\d{2}-\d{2}:\d{2}:\d{2}:\d{2}).", txn)
    return {
        'expense_amount': float(x.group(1)),
        'payment_mode': x.group(2).split('at', 1)[0].strip(),
        'merchant': x.group(2).split('at', 1)[1].strip(),
        'datetime': datetime.datetime.strptime(x.group(3), hdfcCreditCardInterpreterFormatStr)
    } if x != None else {}

from IPython.display import HTML, display
import tabulate

def extract_transactions(i):
    keys = ['expense_amount', 'payment_mode', 'merchant', 'datetime', 'category', 'sub_category']
#     return list(i[1]['transaction'].values()) + [str(i[1]['_id'])]
    return [i[1]['transaction'].get(key, None) for key in keys]

def mark_duplicate(collection, _id):
    result = collection.update_one({'_id': bson.ObjectId(_id)}, {'$set' : {'status': {'duplicate': True}}})
    print('success' if result.modified_count == 1 else 'failed')
    return result

def add_category_info(collection, _id, category='', sub_category=''):
    result = collection.update_one(
        {'_id': bson.ObjectId(_id)}, 
        {'$set' : {'transaction.category': category, 'transaction.sub_category': sub_category}})
    print('success' if result.modified_count == 1 else 'failed')
    return result

In [36]:
paytmInterpreterFormatStr = '%b %d, %Y %H:%M:%S'
# Paid Rs. 106.76 to UBER on Feb 10, 2020 11:33:28 with Ref: 28811759158. For more details, visit https://p-y.tm/1Q-bnfM
def paytmInterpreter(txn):
    x = re.search("Paid Rs. ([0-9.]+) to\s+([a-zA-Z0-9\s._//]+)? on (([a-zA-Z]){3} \d{1,2}, (\d{4} \d{2}:\d{2}:\d{2}))", txn)
    return {
        'expense_amount': float(x.group(1)),
        'payment_mode': 'PAYTM',
        'merchant': x.group(2).strip(),
        'datetime': datetime.datetime.strptime(x.group(3), paytmInterpreterFormatStr)
    } if x != None else {}

for _item in list(
    enumerate(
        collection.find({
            "message.text": {'$regex': 'Paid Rs. ([0-9.]+) to'},
            "status.analysis_done": {'$ne': True}}
        ).sort([("message.date",1)]))):
    print("message", _item[1])
    _transaction = paytmInterpreter(_item[1]['message']['text'])
    print("analysis", _transaction, "\n\n")
    result = collection.update_one(
        {'_id': bson.ObjectId(str(_item[1]['_id']))},
        {'$set' : {'transaction': _transaction, 'status': {'analysis_done': True}}})
    print('success' if result.modified_count == 1 else 'unsuccessful')

message {'_id': ObjectId('5e46a9363fdf437c9b43e294'), 'message': {'text': 'Paid Rs. 106.73 to UBER on Oct 1, 2019 19:00:09 with Ref: 26704838359. For more details, visit https://p-y.tm/ns8-MTj', 'date': 591629411167417984, 'guid': 'A99FE6B4-4FBA-46AB-F640-A0F7C7788C05'}}
analysis {'expense_amount': 106.73, 'payment_mode': 'PAYTM', 'merchant': 'UBER', 'datetime': datetime.datetime(2019, 10, 1, 19, 0, 9)} 


message {'_id': ObjectId('5e46a9373fdf437c9b43e296'), 'message': {'text': 'Paid Rs. 104.56 to UBER on Oct 2, 2019 10:38:06 with Ref: 26713391394. For more details, visit https://p-y.tm/cqXu-oa', 'date': 591685689347947008, 'guid': '7751B722-DAE8-D7A9-5E41-0943F0DD5AB9'}}
analysis {'expense_amount': 104.56, 'payment_mode': 'PAYTM', 'merchant': 'UBER', 'datetime': datetime.datetime(2019, 10, 2, 10, 38, 6)} 


message {'_id': ObjectId('5e46a9373fdf437c9b43e297'), 'message': {'text': 'Paid Rs. 546.49 to UBER on Oct 2, 2019 18:46:58 with Ref: 26721720845. For more details, visit https://p

In [50]:
# Acct ([X0-9]+) debited with INR([0-9.]+) on (\d{1,2}-([a-zA-Z]){3}-\d{2,4}) and ([a-zA-Z0-9@])+ credited.
iciciUPIInterpreterFormatStr = '%d-%b-%y'
def iciciUPIInterpreter(txn):
    x = re.search("Acct ([X0-9]+) debited with INR([0-9.]+) on (\d{1,2}-([a-zA-Z]){3}-\d{2,4}) and ([a-zA-Z0-9@]+) credited.", txn)
    return {
        'expense_amount': float(x.group(2)),
        'payment_mode': x.group(1),
        'merchant': x.group(5).strip(),
        'datetime': datetime.datetime.strptime(x.group(3), iciciUPIInterpreterFormatStr)
    } if x != None else {}
#     return x

for _item in list(
    enumerate(
        collection.find({
            "message.text": {'$regex': 'Acct ([X0-9]+) debited with INR([0-9.]+)'},
            "status.analysis_done": {'$ne': True}}
        ).sort([("message.date",1)]))):
    print("message", _item[1])
    _transaction = iciciUPIInterpreter(_item[1]['message']['text'])
    print("analysis", _transaction, "\n\n")
#     result = collection.update_one(
#         {'_id': bson.ObjectId(str(_item[1]['_id']))},
#         {'$set' : {'transaction': _transaction, 'status': {'analysis_done': True}}})
#     print('success' if result.modified_count == 1 else 'unsuccessful')

message {'_id': ObjectId('5e26dacad64bdc5152d0330f'), 'message': {'text': 'Acct XX1983 debited with INR120.00 on 06-Feb-19 and q66675194@ybl credited.Info:UPI-903721690874.Call 18601207777 for dispute or SMS BLOCK 1983 to 9215676766', 'date': 571159939937314048, 'guid': '33FBC02F-A0E3-D626-1D8D-C236CDF245EC'}}
analysis {'expense_amount': 120.0, 'payment_mode': 'XX1983', 'merchant': 'q66675194@ybl', 'datetime': datetime.datetime(2019, 2, 6, 0, 0)} 


message {'_id': ObjectId('5e26dacfd64bdc5152d03332'), 'message': {'text': 'Acct XX1983 debited with INR85.00 on 10-Feb-19 and q66675194@ybl credited.Info:UPI-904121364814.Call 18601207777 for dispute or SMS BLOCK 1983 to 9215676766', 'date': 571508864698371072, 'guid': '2B905589-BDF5-D23C-6340-FB85F3FFC631'}}
analysis {'expense_amount': 85.0, 'payment_mode': 'XX1983', 'merchant': 'q66675194@ybl', 'datetime': datetime.datetime(2019, 2, 10, 0, 0)} 


message {'_id': ObjectId('5e26db1fd64bdc5152d03550'), 'message': {'text': 'Acct XXX983 debite

In [4]:
for _item in list(
    enumerate(
        collection.find({
            "message.text": {'$regex': 'ALERT: You\'ve spent Rs.([0-9.]+)'},
            "status.analysis_done": {'$ne': True}}
        ).sort([("message.date",1)]))):
    print("message", _item[1])
    _transaction = hdfcCreditCardInterpreter(_item[1]['message']['text'])
    print("analysis", _transaction, "\n\n")
    result = collection.update_one(
        {'_id': bson.ObjectId(str(_item[1]['_id']))},
        {'$set' : {'transaction': _transaction, 'status': {'analysis_done': True}}})
    print('success' if result.modified_count == 1 else 'unsuccessful')
    
for _item in list(
    enumerate(
        collection.find({
            "message.text": {'$regex': 'ALERT:You\'ve spent Rs.([0-9.]+) on'},
            "status.analysis_done": {'$ne': True}} #             
        ).sort([("message.date",1)]))):
    print("message", _item[1])
    _transaction = hdfcCreditCardInterpreter2(_item[1]['message']['text'])
    print("analysis", _transaction, "\n\n")
    if _transaction != {}:
        result = collection.update_one(
            {'_id': bson.ObjectId(str(_item[1]['_id']))},
            {'$set' : {'transaction': _transaction, 'status': {'analysis_done': True}}})
        print('success' if result.modified_count == 1 else 'unsuccessful')
        
for _item in list(
    enumerate(
        collection.find({
            "message.text": {'$regex': 'ALERT:You\'ve spent Rs.([0-9.]+) via'},
} #             "status.analysis_done": {'$ne': True}
        ).sort([("message.date",1)]))):
    print("message", _item[1])
    _transaction = hdfcCreditCardInterpreterVia(_item[1]['message']['text'])
    print("analysis", _transaction, "\n\n")
    if _transaction != {}:
        result = collection.update_one(
            {'_id': bson.ObjectId(str(_item[1]['_id']))},
            {'$set' : {'transaction': _transaction, 'status': {'analysis_done': True}}})
        print('success' if result.modified_count == 1 else 'unsuccessful')

ServerSelectionTimeoutError: localhost:27017: [Errno 111] Connection refused

In [57]:
year = 2020
month = 2

next_month = 1 if month == 12 else month + 1
next_year = year + 1 if month == 12 else year

monthly = list(enumerate(collection.find({'status': {'analysis_done': True},
                 'transaction.datetime' : {
                     '$gte': datetime.datetime(year,month,1), '$lt': datetime.datetime(next_year,next_month,1)}
                  }, {'transaction': 1})))

monthly_transactions = list(map(extract_transactions, monthly))

In [53]:
display(HTML(tabulate.tabulate(monthly_transactions, tablefmt='html')))

0,1,2,3,4,5,6,7,8
500.0,CREDIT Card xx3690,PAYTM,2020-02-02 20:44:58,272486.89,27513.1,5e3a783285ce1ed04ce1b6c3,,
465.5,CREDIT Card xx3690,FRESHTOH3631510,2020-02-02 20:42:47,272986.5,27013.5,5e3a783285ce1ed04ce1b6c4,,
465.5,CREDIT Card xx3690,FRESHTOH3631510 in BANGALORE,2020-02-02 20:42:48,5e3a783285ce1ed04ce1b6c5,,,,
400.0,CREDIT Card xx3690,PAYTM3852398,2020-02-02 20:49:59,272086.89,27913.1,5e3a783285ce1ed04ce1b6c6,,
199.0,Debit Card xx1827,NETFLIX ENTERTAINMENT,2020-02-03 19:28:14,5e3a783385ce1ed04ce1b6ca,,,,
500.0,CREDIT Card xx3690,PAYTM3852398,2020-02-03 23:14:18,271586.0,28414.0,5e3a783385ce1ed04ce1b6cb,,
60.0,XXX983,bharatpe90200570491@yesbankltd,2020-02-04 00:00:00,5e3a783385ce1ed04ce1b6cc,,,,
60.0,XXX983,bharatpe90200570491@yesbankltd,2020-02-06 00:00:00,5e3e5b35dd2322b0ad5cccf7,,,,
1514.37,CREDIT Card xx3690,www.bigbasket.,2020-02-06 22:08:21,270071.63,29928.4,5e3e5b35dd2322b0ad5cccfa,,
588.0,CREDIT Card xx3690,MYNTRA72883,2020-02-06 22:50:28,269484.52,30515.5,5e3e5b35dd2322b0ad5cccfc,,


In [58]:
# add_category_info(collection, '5e497fb75bc90c80c1c82a81', category='purchase', sub_category='toys')

success


<pymongo.results.UpdateResult at 0x7f2dbef73aa0>

In [63]:
# Txn of INR 970.00 done on Acct XX983 on 01-Feb-20.Info: VPS*PAY Sreen.Avbl Bal:INR 13,481.75.Call 18002662 for dispute or SMS BLOCK 983 to 9215676766
# Txn of INR ([0-9.]+) done on Acct ([X0-9]+) on (\d{1,2}-([a-zA-Z]){3}-\d{2,4}).Info: ([a-zA-Z0-9\s._//*]+)?Avbl Bal:INR ([\d,]+.[\d]{2})?.

iciciUPIInterpreterFormatStr2 = '%d-%b-%y'
def iciciUPIInterpreter2(txn):
    x = re.search("Txn of INR ([0-9.]+) done on Acct ([X0-9]+) on (\d{1,2}-([a-zA-Z]){3}-\d{2,4}).Info: ([a-zA-Z0-9\s._//*]+)?Avbl Bal:INR ([\d,]+.[\d]{2})?.", txn)
    return {
        'expense_amount': float(x.group(1)),
        'payment_mode': x.group(2),
        'merchant': x.group(5).strip(),
        'datetime': datetime.datetime.strptime(x.group(3), iciciUPIInterpreterFormatStr)
    } if x != None else {}
#     return x

for _item in list(
    enumerate(
        collection.find({
            "message.text": {'$regex': 'Txn of INR ([0-9.]+) done on Acct ([X0-9]+) on'},
            "status.analysis_done": {'$ne': True}}
        ).sort([("message.date",1)]))):
    print("message", _item[1])
    _transaction = iciciUPIInterpreter2(_item[1]['message']['text'])
    print("analysis", _transaction, "\n\n")
    result = collection.update_one(
        {'_id': bson.ObjectId(str(_item[1]['_id']))},
        {'$set' : {'transaction': _transaction, 'status': {'analysis_done': True}}})
    print('success' if result.modified_count == 1 else 'unsuccessful')

message {'_id': ObjectId('5e26dad5d64bdc5152d03355'), 'message': {'text': 'Txn of INR 199.00 done on Acct XX1983 on 20-Feb-19.Info: VIN*ITUNES CO.Avbl Bal:INR 12,112.07.Call 18601207777 for dispute or SMS BLOCK 1983 to 9215676766', 'date': 572324444443048960, 'guid': 'BF8E98A6-07B9-2C3C-BF33-33710568A78B'}}
analysis {'expense_amount': 199.0, 'payment_mode': 'XX1983', 'merchant': 'VIN*ITUNES CO.', 'datetime': datetime.datetime(2019, 2, 20, 0, 0)} 


message {'_id': ObjectId('5e26dadfd64bdc5152d03399'), 'message': {'text': 'Txn of INR 79.00 done on Acct XX1983 on 05-Mar-19.Info: VIN*ITUNES CO.Avbl Bal:INR 12,033.07.Call 18601207777 for dispute or SMS BLOCK 1983 to 9215676766', 'date': 573473703451655040, 'guid': 'F9659202-15E3-8160-7305-A1BF4C8ED47D'}}
analysis {'expense_amount': 79.0, 'payment_mode': 'XX1983', 'merchant': 'VIN*ITUNES CO.', 'datetime': datetime.datetime(2019, 3, 5, 0, 0)} 


message {'_id': ObjectId('5e26dae0d64bdc5152d0339c'), 'message': {'text': 'Txn of INR 75.00 done 