## Getting the imports

In [2]:
import pandas as pd
import pandasdmx as sdmx
import pymongo
import json

unsd = sdmx.Request('UNSD') #Get the UNSD DataSource
unsd_flow = unsd.dataflow()
unsd_str = unsd.datastructure()

## Inspect the different dataset labels

In [None]:
#All the dataflows
dataflows = sdmx.to_pandas(unsd_flow.dataflow) #All dataflows
dataflows

In [None]:
# Extract the UNFCC and Energy Balance dataset
unfcc_msg = unsd.dataflow('DF_UNData_UNFCC')
ebal_msg = unsd.dataflow('DF_UNData_EnergyBalance')
unfcc_msg, ebal_msg

In [None]:
#Get the DataStructures
dsd_unfcc =  unfcc_msg.structure.DSD_GHG_UNDATA
dsd_ebal = ebal_msg.structure.DSD_ENERGY_BALANCE_UNDATA

In [None]:
#Show the different codelists
dsd_ebal.dimensions.components, dsd_unfcc.dimensions.components

## Getting the Data

In [2]:
resp_unfcc = unsd.data('DF_UNData_UNFCC')
resp_ebal = unsd.data('DF_UNData_EnergyBalance')

In [3]:
df_ebal = resp_ebal.to_pandas()
df_unfcc = resp_unfcc.to_pandas()

In [None]:
df_ebal.head(), df_unfcc.head()

In [None]:
df_unfcc.reset_index().to_csv('data_unfcc.csv',index=False)
df_ebal.reset_index().to_csv('data_ebal.csv',index=False)

## Creating MongoDB server

In [18]:
#client = pymongo.MongoClient('mongodb+srv://sayan:infinity@infinity.9hew3.mongodb.net/<dbname>?retryWrites=true&w=majority')
client = pymongo.MongoClient('localhost:27017')

In [19]:
db = client.UNSD

In [20]:
col_ebal = db.ebal
col_unfcc = db.unfcc

In [21]:
df_ebal = pd.read_csv('old_data_ebal.csv')
df_unfcc = pd.read_csv('old_data_unfcc.csv')
data_json_unfcc = json.loads(df_unfcc.to_json(orient='records'))
data_json_ebal = json.loads(df_ebal.to_json(orient='records'))

In [22]:
col_ebal.insert_many(data_json_ebal)
col_unfcc.insert_many(data_json_unfcc)

<pymongo.results.InsertManyResult at 0x7fea6138c680>

## Maintaining Server

In [None]:
from tqdm import tqdm

old_df = pd.read_csv('old_data_ebal.csv')
new_df = pd.read_csv('data_ebal.csv')

update_df = new_df[~new_df.apply(tuple,1).isin(old_df.apply(tuple,1))]

db = client.get_database('UNSD')
col_ebal = db.get_collection('ebal')
update_count = 0

for record in tqdm(update_df.to_dict('records')):
    result = col_unfcc.replace_one(filter=record, # locate the document if exists
                                    replacement=record,# latest document   
                                    upsert=True)          # update if exists, insert if not
    if result.upserted_id is not None:
        update_count += 1