# Review Asset Growth

In [None]:
import pymongo as mongo
import pandas as pd

%matplotlib inline

## Define a Flattening Function

NOTE: Items like the following would be placed in libraries so that they are readily available. It should not be necessary to define on the fly.

In [None]:
def flatten(dct):
    "A function to unnest data"
    def _go(dct, newdct):
        for key, value in dct.items():
            if isinstance(value, dict):
                # if the value is of type dictionary recurse to pull the 
                # values out of the sub-dictionary
                _go(value, newdct)
            else:
                newdct[key] = value
        return newdct
    
    return _go(dct, dict())

## Connect to DB

In [None]:
client = mongo.MongoClient('mongodb-app')

In [None]:
client.database_names()

In [None]:
db = client.get_database('fdic_ffeic')

## Find the Collection

In [None]:
db.collection_names()

In [None]:
fin_collection = db['ffeic_reports']

## Query the Data

For nested keys we use the '.' reference. Below we see the key 'financials.RCFD2170' as an example of this syntax.

In [None]:
projection = {
    'fdic_certificate_number':1, 
    'financial_institution_name': 1,
    'reporting_period_end_date': 1,
    'financials.RCFD2170': 1,
    '_id':0
}

cursor = fin_collection.find(dict() , projection)

In [None]:
observations = list()

for item in cursor:
    flat = flatten(item)
    observations.append(flat)

In [None]:
df = pd.DataFrame(observations)

## Transform the Queried Data

In [None]:
df.head(2)

###### Set Column Names

In [None]:
df.columns = ['total_assets', 'fdic_id', 'name', 'datestring']

In [None]:
df.head(2)

###### Review 'name' column for transform

We want to use the names as column names in a table that looks like the following

| datestring | usaa | BofA | ....|
|------------|------|------|-----|
| 2017-01-01 | 12.5 | 13.4 | ....|
| 2017-01-01 | 13.9 | 17.1 | ....|

In [None]:
df.name.drop_duplicates()

drop everything after "," => lower case all characters => replace " " with "_", and store to a new column "name_t"

In [None]:
df['name_t'] = df.name.apply(lambda name: name.split(',')[0].lower().replace(' ', '_'))

In [None]:
df.head(2)

###### Pivot data for our final table

In [None]:
pivoted = df.pivot(index='datestring', columns='name_t', values="total_assets")

In [None]:
pivoted.head()

## Plot Data

Plot the data to review the result

In [None]:
pivoted.index = pd.to_datetime(pivoted.index)

In [None]:
pivoted.plot();