In [246]:
import os
import sys
import itertools

import pydocumentdb.documents as documents
import pydocumentdb.document_client as document_client
import pydocumentdb.errors as errors

import pandas as pd;pd.options.display.max_rows = 999
import matplotlib.pyplot as plt
plt.style.use('seaborn')
import numpy as np

import view.config as cfg
import view.storage as st

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

from IPython.core.display import HTML
from IPython.display import display

In [2]:
HOST = cfg.settings.host
MASTER_KEY = cfg.settings.master_key
DATABASE_ID = cfg.settings.database_id
COLLECTION_ID = cfg.settings.collection_id

#### 1. Fetch available resources

In [3]:
res_query = "select c.resourcename from c"
res_names = st.Reader(DATABASE_ID, COLLECTION_ID, res_query, cfg.settings)
res_names = np.unique([resource["resourcename"] for resource in res_names])

#### 2. Download Data from CosmosDB

In [4]:
data = {}
dividers = {}

for res_name in res_names:
    
    doc_query = "select * from c where c.resourcename = '{0}'".format(res_name)
    
    docs = st.Reader(DATABASE_ID, COLLECTION_ID, doc_query, cfg.settings)
    
    if not len(docs) == 0:
        df = pd.DataFrame([item for doc in docs for item in doc["data"]])
        data[res_name] = df
        
        dividers[res_name] = getattr(cfg.groups, docs[0]["apiname"])
    

#### 3. Basics Statistics

In [384]:
def GetDescription( df, dividers, res_names, index_names ):
	description = GetDescribe( df, dividers, res_names )
	description_df = {}
	for key, value in description.iteritems():
		description_df[key] = GetSingletonStatistic(description[key])
	
	description_df = pd.concat(description_df)
	description_df.index.names = index_names
	return description_df

def GetStatistics( df, dividers, res_names ):
	description = {}
	
	for res_name in res_names:
	
		if not data[res_name].empty:
			if (dividers[res_name] != None 
				and any(dividers[res_name] in s for s in data[res_name].columns.values)):
				groups = data[res_name].groupby([dividers[res_name]])
				description[res_name] = groups.describe()
				description[res_name] = GetSingletonStatistic(description[res_name])
			else:
				description[res_name] = data[res_name].describe().T
				description[res_name].columns = pd.MultiIndex.from_tuples([(res_name, c) for c in description[res_name].columns])
				description[res_name] = GetSingletonStatistic(description[res_name])
	
	return description


def GetSingletonStatistic( description ):
	description = description.stack(0).reset_index(1)
	description = description.rename(index=str, columns={"level_1": "Value_Type"})
	index = pd.MultiIndex.from_tuples(GetSubResourceIndexs( description ))
	description = pd.DataFrame(description.values,index,description.columns);
	description= description.drop('Value_Type', axis=1)
	return description

def GetSubResourceIndexs( description ):
	index =[]
	if (type([description.Value_Type[i] for i in description.index][0])  is pd.core.series.Series) :
		index = pd.MultiIndex.from_tuples([(i,s) for i in description.index for s in description.Value_Type[i]])
	else:
		index = pd.MultiIndex.from_tuples([(i,description.Value_Type[i]) for i in description.index])
	
	return index.unique()



def GetDescribe( df, dividers, res_names ):
	description = {}
	
	for res_name in res_names:
	
		if not data[res_name].empty:
			if (dividers[res_name] != None 
				and any(dividers[res_name] in s for s in data[res_name].columns.values)):
				groups = data[res_name].groupby([dividers[res_name]])
				description[res_name] = groups.describe()
			else:
				description[res_name] = data[res_name].describe().T
				description[res_name].columns = pd.MultiIndex.from_tuples([(res_name, c) for c in description[res_name].columns])
	
	return description

index_names = ["Resource Type","Resource Subtype","Value Type"]
df = GetDescription2( data, dividers, res_names, index_names )
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,25%,50%,75%,count,freq,max,mean,min,std,top,unique
Resource Type,Resource Subtype,Value Type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
accepted_offers,AFRR,value,614.0,685.0,782.0,49916,,1656.0,709.639,225.0,134.89,,
accepted_offers,FCR,value,621.0,663.0,722.0,49916,,1304.0,680.648,447.0,87.2578,,
accepted_offers,MFRR,value,2499.75,4676.0,7051.0,33696,,14180.0,4843.1,0.0,3054.63,,
accepted_offers,RR,value,11834.0,14243.0,17199.0,32292,,32415.0,14357.9,0.0,5332.29,,
actual_generations_per_production_type,BIOMASS,value,194.0,209.0,221.0,3695,,225.0,207.285,156.0,11.5414,,
actual_generations_per_production_type,FOSSIL_GAS,value,1638.5,2576.0,4658.5,3695,,7928.0,3165.3,617.0,1993.99,,
actual_generations_per_production_type,FOSSIL_HARD_COAL,value,398.25,1408.0,2461.75,3694,,3534.0,1447.47,-61.0,1109.97,,
actual_generations_per_production_type,FOSSIL_OIL,value,255.0,273.0,291.0,3694,,1946.0,289.952,193.0,118.34,,
actual_generations_per_production_type,HYDRO_PUMPED_STORAGE,value,-369.0,79.0,785.0,3693,,3284.0,61.0669,-3498.0,1330.39,,
actual_generations_per_production_type,HYDRO_RUN_OF_RIVER_AND_POUNDAGE,value,4998.0,5676.5,6274.0,3694,,7496.0,5607.41,2792.0,881.234,,


In [90]:
description_df.columns.levels[0].unique

<bound method Index.unique of Index([u'available_value', u'direction', u'downward_volume', u'end_date',
       u'installed_capacity', u'load_factor', u'maximum_downward_price',
       u'maximum_upward_price', u'minimum_downward_price',
       u'minimum_upward_price', u'nature', u'price', u'required_value',
       u'start_date', u'updated_date', u'upward_volume', u'value', u'version',
       u'voltage_level_connection'],
      dtype='object')>

In [245]:

for res_name in res_names:
    
    if not data[res_name].empty:
        if (dividers[res_name] != None 
            and any(dividers[res_name] in s for s in data[res_name].columns.values)):
            groups = data[res_name].groupby([dividers[res_name]])
            groups.plot(x='start_date', y='value', marker='.',figsize=(20,10))
        else:
            data[res_name].plot(x='start_date', y='value', marker='.',figsize=(20,10))
    
    groups.plot(x='start_date', y='value', marker='.',figsize=(20,10))
    plt.xticks(rotation=30)
    plt.legend(fontsize=15)
    
plt.show()

KeyError: 'value'

In [22]:
# Provide the path to the yaml file relative to the working directory
display(HTML('''<style>
    .widget-label { min-width: 20ex !important; }
    .widget-text { min-width: 60ex !important; }
</style>'''))

#Toggle Code
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();

 } else {
 $('div.input').show();

 }
 code_show = !code_show
} 
//$( document ).ready(code_toggle);//commenting code disabling by default
</script>
<form action = "javascript:code_toggle()"><input type="submit" value="Toggle Raw Code"></form>''')