In [1]:
import pandas as pd
from pandas.io.json._normalize import nested_to_record  
import  math
import os
import bson
from dateutil import parser
import datetime


from pymongo import MongoClient

def mongoCollection(connstr, db, collection):
    client = MongoClient(connstr)
    db = client[db]
    return db[collection]

collection = mongoCollection(os.environ.get('MONGODB_CONN_STR'), 'smsinfo', 'transactions')

In [18]:
pipeline = [
    {"$match": {
      "status.analysis_done": True,
      "transaction.category": {'$exists': True}
    }},
    {"$sort": {
      'message.date': -1,
    }},
    {"$project": {
      "_id": "$transaction.merchant",
      "merchant": "$transaction.merchant",
      "category": "$transaction.category",
      "sub_category": "$transaction.sub_category"
    }},
    {"$group": {
      '_id': '$merchant',
      'merchant': {
        '$last': '$merchant'
      },
      'category': {
        '$last': '$category'
      },
      'sub_category': {
        '$last': '$sub_category'
      }
    }}
]

monthly = list(collection.aggregate(pipeline))

print(monthly)

[{'_id': 'UBER', 'merchant': 'UBER', 'category': 'taxi', 'sub_category': 'uber'}, {'_id': 'Freshtohome', 'merchant': 'Freshtohome', 'category': 'grocery', 'sub_category': 'non-veg'}, {'_id': 'SWIGGY95116', 'merchant': 'SWIGGY95116', 'category': 'food', 'sub_category': 'Delivery'}, {'_id': 'SWIGGYXL3549786', 'merchant': 'SWIGGYXL3549786', 'category': 'food', 'sub_category': 'delivery'}, {'_id': 'bharatpe09600003315¡yesbankltd', 'merchant': 'bharatpe09600003315¡yesbankltd', 'category': 'food', 'sub_category': 'eating-out'}, {'_id': 'HDFCBILLPAY', 'merchant': 'HDFCBILLPAY', 'category': 'utility', 'sub_category': 'internet'}, {'_id': 'JIO Infocom', 'merchant': 'JIO Infocom', 'category': 'utility', 'sub_category': 'mobile'}, {'_id': 'NETFLIX ENTERTAINMENT', 'merchant': 'NETFLIX ENTERTAINMENT', 'category': 'subscription', 'sub_category': 'entertainment'}, {'_id': 'VPS*DROMI    .', 'merchant': 'VPS*DROMI    .', 'category': 'meet-up', 'sub_category': 'food'}, {'_id': 'Drdo', 'merchant': 'Drdo'

In [14]:
merchant_dict = {val['_id'] : val for idx, val in enumerate(monthly)}

In [15]:
print(merchant_dict)

{'AKSHAYAK3819720': {'_id': 'AKSHAYAK3819720', 'category': 'None', 'sub_category': 'None'}, 'zerodhabroking@hdfcbank': {'_id': 'zerodhabroking@hdfcbank', 'category': 'None', 'sub_category': 'None'}, 'PEPPERFRY64213': {'_id': 'PEPPERFRY64213', 'category': 'purchase', 'sub_category': 'furniture'}, 'LIC': {'_id': 'LIC', 'category': 'insurance', 'sub_category': 'insurance'}, 'cru5ty.d3m0nx-2@okhdfcbank': {'_id': 'cru5ty.d3m0nx-2@okhdfcbank', 'category': 'others', 'sub_category': 'others'}, 'NEW FRUITLAND': {'_id': 'NEW FRUITLAND', 'category': 'grocery', 'sub_category': 'vegetables'}, 'PVR LIMITED.': {'_id': 'PVR LIMITED.', 'category': 'food', 'sub_category': 'movie'}, 'bharatpe90200570491@yesbankltd': {'_id': 'bharatpe90200570491@yesbankltd', 'category': 'meet-up', 'sub_category': 'breakout'}, 'Paratha Corner': {'_id': 'Paratha Corner', 'category': 'food', 'sub_category': 'delivery'}, 'PHOENIX MARKETCITY BAN': {'_id': 'PHOENIX MARKETCITY BAN', 'category': 'auto', 'sub_category': 'parking'}

In [22]:
def add_category_info_for_merchant(collection, merchant, category='', sub_category=''):
    result = collection.update_many(
        {'transaction.merchant': merchant, 'transaction.category': None}, 
        {'$set' : {'transaction.category': category, 'transaction.sub_category': sub_category}})
    print(str(merchant) + ' modified ' + str(result.modified_count))
    return result

In [23]:
    merchant_category_mapping = list(collection.aggregate(pipeline))
    for _m in merchant_category_mapping:
        add_category_info_for_merchant(collection, _m['merchant'], category=_m['category'], sub_category=_m['sub_category'])

THENEWIN1262499 modified 0
PEPPERFRY64213 modified 0
zerodhabroking@hdfcbank modified 0
AKSHAYAK3819720 modified 0
PVR LIMITED. modified 0
NEW FRUITLAND modified 0
LIC modified 0
cru5ty.d3m0nx-2@okhdfcbank modified 0
bharatpe90200570491@yesbankltd modified 0
PHOENIX MARKETCITY BAN modified 0
Paratha Corner modified 0
H M LEISURE modified 0
Instapay BBPS modified 0
VIN*APPLE COM. modified 0
swiggyupi@axisbank modified 0
Newspaper modified 0
AMAZON modified 0
..ARENA_ modified 0
Bharat Sanchar Nigam L modified 0
NEW FRUIT LAND modified 0
chinjuprakasan90@okhdfcbank modified 0
upiswiggy@icici modified 0
paytmqr281005050101j1cog2ifqf2u@paytm modified 0
q22904860@ybl modified 0
www.bigbasket. modified 0
bharatpe.9040576993@icici modified 0
FIRSTCRY11847 modified 0
SWIGGY modified 0
..MATTO COFFEA_ modified 0
BBPSBILLPAY modified 0
HORTICULTURE PRODUCERS modified 0
www.dunzo.in modified 0
UBER modified 0
zomato@hdfcbank modified 0
AARKITHA modified 0
Akshayakalpa modified 0
BHARTI AIRTEL LIM