# Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import datetime

from googletrans import Translator

# MyDataFrame Class

In [2]:
class MyDataFrame: 
    def __init__(self, df, translate=False, translate_first_level=False, white_space=False, drop_level=True):
        
        self.df = df
        self.translator = Translator()
        self.title = ''
        self.unit = ''
        self.footer = ''
        self.translate = translate
        self.translate_first_level = translate_first_level
        self.white_space= white_space
        self.drop_level = drop_level
        #self.ratio = (((self.df.iloc[:,-2]-self.df.iloc[:,-3])/self.df.iloc[:,-3])*100).round()
        #self.last_column = self.df.iloc[:,-1].replace('..', 0).round()
        
        

        if self.df.index.nlevels > 1:
            self.title_unit_multiindex();
        
        if self.drop_level:
            self.drop_levels()
            
        #if (self.ratio == self.last_column).all():
        #   self.drop_last_column()
        
        
        if self.translate:
            if self.df.index.nlevels == 1:
                self.translate_index()
                
            
        if self.translate:
            if self.df.index.nlevels > 1:
                self.translate_multi_index()
        
        
        if self.white_space:
            self.replace_white_space()

        self.drop_na()
            
            
    def title_unit_multiindex(self):
        self.title = self.df.columns[0][0]
        self.unit = self.df.columns[1][1]
            
    def drop_levels(self):
        """
        This method drops two column levels that contained the infos previously captured (table's title and unit)
        """     
        while self.df.columns.nlevels>1:
            self.df.columns = self.df.columns.droplevel(0)
            
            
    def drop_na(self):
        """
        This method drops all rows and columns that have all values equals to NaN.
        """  
        self.df.dropna(how = 'all', inplace = True)
        self.df.dropna(axis = 'columns', how = 'all', inplace = True)

    
    def translate_index(self):
        """
        This method translates the index of a DataFrame to English.
        """
        self.new_index = []
        for index in self.df.index:
            if index == 'Reinjeção':
                self.new_index.append('Reinjection')
            elif (index == 'Espírito Santo') or (index == 'Espirito_Santo'):
                self.new_index.append('Espirito_Santo')
            elif index == 'Amazonas':
                self.new_index.append('Amazonas')
            elif index == 'Alagoas':
                self.new_index.append('Alagoas')
            elif (index == 'Ceará') or (index == 'Ceara'):
                self.new_index.append('Ceara')
            elif (index == 'Rio Grande do Norte') or (index == 'Rio_Grande_do_Norte'):
                self.new_index.append('Rio_Grande_do_Norte')
            else:
                self.new_index.append(self.translator.translate(index).text)
        self.df.index = self.new_index
        
                        
    def translate_multi_index(self):
        """
        This method translates a MultiIndex DataFrame to English.
        """
        
        if self.translate_first_level == True:
            for i, num in enumerate(self.df.index):
                    for j in range(self.df.index.nlevels):       
                        if j==0:
                            if (self.df.index[i][j] == 'Espírito_Santo') or (self.df.index[i][j] == 'Espirito_Santo'):
                                self.df.index = self.df.index.set_levels(self.df.index.levels[j].str.replace('Espírito_Santo','Espirito_Santo'), level = j)
                            elif self.df.index[i][j] == 'Amazonas':
                                self.df.index = self.df.index.set_levels(self.df.index.levels[j].str.replace('Amazonas','Amazonas'), level = j)
                            elif self.df.index[i][j] == 'Alagoas':
                                self.df.index = self.df.index.set_levels(self.df.index.levels[j].str.replace('Alagoas','Alagoas'), level = j)
                            elif (self.df.index[i][j] == 'Ceará') or (self.df.index[i][j] == 'Ceara'):
                                self.df.index = self.df.index.set_levels(self.df.index.levels[j].str.replace('Ceará','Ceara'), level = j)
                            elif (self.df.index[i][j] == 'Rio Grande do Norte') or (self.df.index[i][j] == 'Rio_Grande_do_Norte'):
                                self.df.index = self.df.index.set_levels(self.df.index.levels[j].str.replace(' ','_'), level = j)
                            else:
                                self.df.index = self.df.index.set_levels(self.df.index.levels[j].str.replace(self.df.index[i][j], self.translator.translate(self.df.index[i][j]).text), level = j)
        
        for i, num in enumerate(self.df.index):
            for j in range(self.df.index.nlevels):
                if j==0:
                    pass
                if j==1:
                    if self.df.index[i][j] == 'Mar': # checks if one of the words that the translate package can not translate
                        self.df.index = self.df.index.set_levels(self.df.index.levels[j].str.replace('Mar','Offshore'), level = j)
                    elif self.df.index[i][j] == 'Terra': # checks if one of the words that the translate package can not translate
                        self.df.index = self.df.index.set_levels(self.df.index.levels[j].str.replace('Terra','Onshore'), level = j)
                    elif not isinstance(self.df.index[i][j], str):
                        pass
                    else:
                        self.df.index = self.df.index.set_levels(self.df.index.levels[j].str.replace(self.df.index[i][j], self.translator.translate(self.df.index[i][j]).text), level = j)
        
        
    def replace_underscore(self):
        """
        This method replaces all underscore for white space.
        """
        if self.df.index.nlevels > 1: # tells how many level are
            for i, level in enumerate(range(self.df.index.nlevels)): # runs through levels
                #for j, value in enumerate(self.df.index.levels[i]): # runs through the level's value and replace white space for underline
                self.df.index = self.df.index.set_levels(self.df.index.levels[i].str.replace("_", " "), level = i)
        
        elif self.df.index.nlevels == 1:
            self.new_index = []
            for index in self.df.index:
                self.new_index.append(index.replace('_', ' '))
            self.df.index = self.new_index
            
            
    def drop_unnamed_column(self):
        """
        This method drops the last column if its name starts with 'Unnamed'.
        """            
        for i,name in enumerate(self.df.columns):
            if type(name) == str and name.startswith('Unnamed'):
                self.df = self.df.drop(self.df.columns[-1], axis=1)

    def index_sups(self):
        if self.df.index.nlevels > 1:
            for name in self.df.index.levels[0]:

                self.df.index = self.df.index.set_levels(self.df.index.levels[0].str.replace('1','¹'), level = 0)
                self.df.index = self.df.index.set_levels(self.df.index.levels[0].str.replace('2','²'), level = 0)
                self.df.index = self.df.index.set_levels(self.df.index.levels[0].str.replace('3','³'), level = 0)
                self.df.index = self.df.index.set_levels(self.df.index.levels[0].str.replace('4','⁴'), level = 0)
                self.df.index = self.df.index.set_levels(self.df.index.levels[0].str.replace('5','⁵'), level = 0)
                self.df.index = self.df.index.set_levels(self.df.index.levels[0].str.replace('6','⁶'), level = 0)
                self.df.index = self.df.index.set_levels(self.df.index.levels[0].str.replace('7','⁷'), level = 0)
                self.df.index = self.df.index.set_levels(self.df.index.levels[0].str.replace('8','⁸'), level = 0)
                self.df.index = self.df.index.set_levels(self.df.index.levels[0].str.replace('9','⁹'), level = 0)

        if self.df.index.nlevels == 1:
            self.df.index = self.df.index.str.replace('1','¹')
            self.df.index = self.df.index.str.replace('2','²')
            self.df.index = self.df.index.str.replace('3','³')
            self.df.index = self.df.index.str.replace('4','⁴')
            self.df.index = self.df.index.str.replace('5','⁵')
            self.df.index = self.df.index.str.replace('6','⁶')
            self.df.index = self.df.index.str.replace('7','⁷')
            self.df.index = self.df.index.str.replace('8','⁸')
            self.df.index = self.df.index.str.replace('9','⁹')


# Load and Wrangling

## Total¹ Reserves of Natural Gas, by Location (Shore and Offshore), according to Federation² Units - 2010-2019

In [3]:
total_reserves = MyDataFrame(pd.read_excel(r'data_set/anuario-2020-tabela-2_5.xls',  header = [0,2,3], index_col = [0,1]), translate=True,)

#dropping Unnamed column
total_reserves.drop_unnamed_column()

# Correcting Index Supscrit
total_reserves.index_sups()

# Transposing DataFrame
total_reserves.df = total_reserves.df.T

# Setting unit, title and footer
total_reserves.unit = '10⁶ m³'
total_reserves.title = 'Total¹ Reserves of Natural Gas, by Location (Shore and Offshore), according to Federation² Units'
total_reserves.footer ='Source:\nANP/SDP, as per Resolution ANP No. 47/2014.\n\nNotes:\n1. Reserves on 12/31 of the reference years.\n2. See the General Notes item on "Brazilian Oil and Natural Gas Reserves".\n\n¹ Including reserves whose fields Development Plans are under analysis.\n² The reserves are fully appropriated to the state in which each field has its area mostly located.\n³ The Roncador and Frade field reserves are fully appropriated in the State of Rio de Janeiro, for simplification.\n⁴ The Sapinhoá field reserves are fully appropriated in the State of São Paulo for simplification.\n⁵ The reserves in the Caravela field are fully appropriated in the State of Paraná, for simplification.\n⁶ The Tubarão field reserves are fully appropriated in the State of Santa Catarina, for simplification.\n'

## Evolution of Natural Gas Processing Capacity, According to Production Centers - 2010-2019

In [4]:
processing = MyDataFrame(pd.read_excel(r'data_set/anuario-2020-tabela-2_30.xls',  header = [0,2,3], index_col = [0]))

# Transposing DataFrame
processing.df = processing.df.T

# Setting unit, title and footer
processing.unit = '10³ m³/dia'
processing.title = 'Evolution of Natural Gas Processing Capacity, According to Production Centers'
processing.footer = '¹ Volume in the gaseous state.\n² Includes the UPGNs (Natural Gas Production Units) in Catu and Bahia until 2013. From 2014, only includes Catu.'

## Proved natural gas reserves¹, per location (onshore and offshore), by State² – 2010-2019

In [5]:
proved_reserves = MyDataFrame(pd.read_excel(r'data_set/table-1_2.xls', header = [0,2,3], index_col = [0,1]))

# Dropping Unnamed Column
proved_reserves.drop_unnamed_column()

# Correcting Index Supscrit
proved_reserves.index_sups()

# Transposing DataFrame
proved_reserves.df = proved_reserves.df.T

# Setting unit, title and footer
proved_reserves.unit = '10⁶ m³'
proved_reserves.title = 'Proved natural gas reserves¹, per location (onshore and offshore), by State²'
proved_reserves.footer = 'Source: \nANP/SDP, as per Ordinance ANP No. 47/2014.\n\nNotes: \n1. Reserves on December 31 of reference years.1Reserves related to fields whose development plans are still under analysis by ANP included.\n2. Condensates included.\n3. See item in General Notes on "Brazilian Oil and Natural Gas Reserves".\n\n¹ Reserves related to fields whose development plans are still under analysis by ANP included.\n² The reserves are fully appropriated to the State in which each field has its area.\n³ Reserves related to Roncador and Frade fields were totally accounted to the State of Rio de Janeiro by means of simplification.\n⁴ Sapinhoá field reserves are fully appropriated in the State of São Paulo for simplification.\n⁵ Reserves related to Caravela field were totally accounted to the State of Paraná by means of simplification.\n⁶ Reserves related to Tubarão field are totally accounted to the State of Santa Catarina by means of simplification.'

## Natural Gas Domestic Sales by Brazilian Region and State – 2010-2019

In [6]:
domestic_sale = MyDataFrame(pd.read_excel(r'data_set/table-1_25.xls', header = [0,2,3], index_col = [0]))

# Dropping Unnamed Column
domestic_sale.drop_unnamed_column()

# Correcting Index Supscrit
domestic_sale.index_sups()

# Transposing DataFrame
domestic_sale.df = domestic_sale.df.T

# Setting unit, title and footer
domestic_sale.unit = '10⁶ m³'
domestic_sale.title = 'Natural Gas Domestic Sales by Brazilian Region and State'
domestic_sale.footer = 'Sources: \nPetrobras and ANP.\n\nNote:\nRelated only the States where there were sales of natural gas in the specified period.\n\n¹Include sales to thermal generation ²Sales for Nitrogen Fertilizer plants (Fafen) and sales for thermal generation.'

## Brazilian natural gas balance – 2010-2019

In [7]:
balance = MyDataFrame(pd.read_excel(r'data_set/table-1_26.xls', header = [0,2,3], index_col = [0], skipfooter=1))

# Dropping Unnamed Column
balance.drop_unnamed_column()

# Correcting Index Supscrit
balance.index_sups()

# Transposing DataFrame and Skiping Duplicate Row
balance.df = balance.df.iloc[[0,1,2,3,4,5,6,7,8]].T

# Setting unit, title and footer
balance.unit = '10⁶ m³'
balance.title = 'Brazilian Natural Gas Balance'
balance.footer = 'Sources: \nANP/SIM, as per Ordinance ANP No. 43/98, for imports data; ANP/SDP, as per Decree No. 2.705/98, for\nproduction, reinjection, gas flaring and losses data; Petrobras, for own consumption, NGL and sales data.\n\n¹ Refers to Petrobras own consumption in production areas, refineries, NGPP (Natural Gas Power Plant),\n transportation and storage. \n² Volume of gas absorbed in NGPPs. \n³ Sales to distributors, nitrofertilizers plants (Fafen) and electricity generation.'

## Brazilian Natural Gas Demand – 2010-2019

In [8]:
# Demand DataFrame
demand = MyDataFrame(balance.df.loc[:,['Import','Reinjection', 'Gas flaring', 'Own consumption¹', 'NGL²', 'Sales³', 'Adjustments and losses']])

# Setting unit, title and footer
demand.title = 'Brazilian Natural Gas Demand'
demand.unit = '10⁶ m³'
demand.footer = 'Sources: \nANP/SIM, as per Ordinance ANP No. 43/98, for imports data; ANP/SDP, as per Decree No. 2.705/98, for\nproduction, reinjection, gas flaring and losses data; Petrobras, for own consumption, NGL and sales data.\n\n¹ Refers to Petrobras own consumption in production areas, refineries, NGPP (Natural Gas Power Plant),\n transportation and storage. \n² Volume of gas absorbed in NGPPs. \n³ Sales to distributors, nitrofertilizers plants (Fafen) and electricity generation.'

In [9]:
# Demand Total Series
demand_total = demand.df.sum(axis=1)

# Setting unit, title and footer
demand_total_title = 'Brazilian Natural Gas Total Demand – 2010-2019'
demand_total_unit = '10⁶ m³'

## Natural gas production, per location (onshore and offshore, pre-salt and post-salt), by State – 2010-2019 

In [10]:
production = MyDataFrame(pd.read_excel(r'data_set/table-1_5.xls', header = [0,2,3], index_col = [0,1]))

# Dropping Unnamed Column
production.drop_unnamed_column()

# Correcting Index Supscrit
production.index_sups()

# Transposing DataFrame
production.df = production.df.T

# Setting unit, title and footer
production.unit='10⁶ m³'
production.title = 'Natural gas production, per location (onshore and offshore, pre-salt and post-salt), by State'
production.footer = 'Source: \nANP/SDP, as per Decree No. 2.705/1998.\n\nNote: \nTotal production includes reinjection, gas flaring, losses and own consumption.'

## Production of associated and non-associated natural gas, by State – 2010-2019

In [11]:
gas_type = MyDataFrame(pd.read_excel(r'data_set/table-1_6.xls', header = [0,2,3], index_col = [0,1]))

# Dropping Unnamed Column
gas_type.drop_unnamed_column()

# Transposing DataFrame
gas_type.df = gas_type.df.T

# Setting unit, title and footer
gas_type.unit='10⁶ m³'
gas_type.title = ('Production of associated and non-associated natural gas, by State')
gas_type.footer = 'Source: \nANP/SDP, as per Decree No. 2.705/1998.\n\nNote: \nTotal production volume includes reinjection, flared gas, losses, own consumption.'

## Average Reference Prices for Natural Gas, According to Federation Units - 2010-2019

In [12]:
avg_price = MyDataFrame(pd.read_excel(r'data_set/anuario-2020-tabela-2_24.xls',  header = [0,2,3,4], index_col = [0]))

# Slicing 'US$/mil m³' units only
avg_price.df = avg_price.df.iloc[:,10:20]

# Transposing DataFrame
avg_price.df = avg_price.df.T

# Setting unit, title and footer
avg_price.unit='US$/mil m³'
avg_price.title = 'Average Reference Prices for Natural Gas, According to Federation Units'
avg_price.footer = 'Source: \nANP / SPG, according to Law No. 9,478 / 1997, Decree No. 2,705 / 1998 and \nANP Ordinance No. 206/2000. \n\nNotes: \n1. Prices in current values. \n2. Only the Federation units that presented natural gas production in the \nindicated period are listed. \n3. The above prices do not serve as a basis for calculating government\nstakes,since they are weighted averages only for production volumes per field \nand do not consider royalty and special participation rates per \nproducing field.'

## Electric Energy by Source

In [13]:
energy_source = pd.read_csv(r'data_set/GeracaoFonte.csv')

# Translating Manually
energy_source.columns = ['ID', 'Energy Type', 'GWh', 'Month', 'Year', 'dthProx']

# Converting into MyDataFrame
energy_source = MyDataFrame(pd.pivot_table(energy_source, values='GWh', index=['Energy Type'], columns=['Year', 'Month']), drop_level=False)

#Translating Index Manually
energy_source.df.index = ['Biomass','Coal','Energy Produced Out of SIN','Eolic','Natural Gas','Hidrelectric','Itaipu','Nuclear','Diesel/Oil','Waste Industrial Processes','Solar']

# Transposing DataFrame
energy_source.df = energy_source.df.T

# Adding all Hydro Sources into one column
energy_source.df['Hydroelectric'] = (energy_source.df.loc[:,'Itaipu'] + energy_source.df.loc[:,'Hidrelectric'])
energy_source.df.drop(['Hidrelectric','Itaipu'], axis=1, inplace=True)
                

In [14]:
energy_source = pd.read_csv(r'data_set/GeracaoFonte.csv')

# Translating Manually
energy_source.columns = ['ID', 'Energy Type', 'GWh', 'Month', 'Year', 'dthProx']

# Converting into MyDataFrame
energy_source = MyDataFrame(pd.pivot_table(energy_source, values='GWh', index=['Energy Type'], columns=['Year', 'Month']), drop_level=False)

#Translating Index Manually
energy_source.df.index = ['Biomass','Coal','Energy Produced Out of SIN','Eolic','Natural Gas','Hidrelectric','Itaipu','Nuclear','Diesel/Oil','Waste Industrial Processes','Solar']

# Transposing DataFrame
energy_source.df = energy_source.df.T

# Adding all Hydro Sources into one column
energy_source.df['Hydroelectric'] = (energy_source.df.loc[:,'Itaipu'] + energy_source.df.loc[:,'Hidrelectric'])
energy_source.df.drop(['Hidrelectric','Itaipu'], axis=1, inplace=True)

#Dropping 2019 
energy_source.df.drop(2019,inplace=True)

# Setting unit, title and footer
energy_source.unit = 'GWh'
energy_source.title = 'Electric Energy by Source'
energy_source.footer = 'Source: ANEEL\nNotes: History of the electric energy volume produced in the country in GWh, expressed by the values of energy load \ndispatched in the National Interconnected System - SIN, classified by renewable sources or not and the volume \nproduced by the generators not yet interconnected.'

## Brazilian Indicators

In [15]:
bra_ind = pd.read_csv(r'data_set/API_BRA_DS2_en_csv_v2_1221399.csv', skiprows=[0,1,2,3])

## Total own consumption of natural gas, according to Regions and Federation Units - 2009-2018

In [16]:
consumption = MyDataFrame(pd.read_csv(r'data_set\anuario-2019-abertos-tabela3_31.csv'))

# Translating Manually Columns Name
consumption.df.columns = ['Region', 'Federation Unit', 'Consume 10³ m³','Year']

# Rearanging MyDataFrame
consumption.df = pd.pivot_table(consumption.df, values='Consume 10³ m³', index='Year', columns=['Region','Federation Unit'])

# Translating Manually Columns Name
consumption.df.columns = consumption.df.columns.set_levels(consumption.df.columns.levels[0].str.replace('Nordeste','Northwest'), level = 0)
consumption.df.columns = consumption.df.columns.set_levels(consumption.df.columns.levels[0].str.replace('Norte','North'), level = 0)
consumption.df.columns = consumption.df.columns.set_levels(consumption.df.columns.levels[0].str.replace('Sudeste','Southeast'), level = 0)
consumption.df.columns = consumption.df.columns.set_levels(consumption.df.columns.levels[0].str.replace('Sul','South'), level = 0)


# Setting unit, title and footer

consumption.df.unit = '10³ m³'
consumption.df.title = 'Natural Gas Consumption by Region and Federation Unit'
consumption.df.footer = 'Source: \nANP Statistical Yearbook 2020'

## Demand by Segment Scrapped From Scratch

Source: http://www.mme.gov.br/documents/36216/1119340/06+-+Boletim+Mensal+de+Acompanhamento+da+Ind%C3%BAstria+de+G%C3%A1s+Natural+Junho+2020/4ecd27ca-bd64-bfa7-3510-03799045f87f

In [17]:

# Dict with values scrapp from MME Relatory
sales_segment_ = {'Industrial¹' : [43.61, 40.82, 40.77, 39.75, 36.97, 36.34, 37.17, 35.70, 28.16, 31.22, 34.61, 33.87],
                  'Automotive' : [4.82, 4.96, 5.40, 6.06, 6.26, 5.87, 6.29, 4.83, 3.36, 3.63, 4.34, 4.72],
                  'Residencial' : [0.97, 1.11, 1.18, 1.26, 1.27, 1.00, 1.14, 1.30, 1.38, 1.49, 1.64, 1.33],
                  'Comercial' : [0.79, 0.83, 0.78, 0.84, 0.91, 0.86, 0.87, 0.84, 0.51, 0.32, 0.46, 0.64],
                  'Electric Generation' : [45.90, 29.59, 34.25, 27.69, 29.03, 40.46, 25.63, 19.52, 17.26, 15.70, 18.12, 22.78],
                  'Cogenaration' : [2.50, 2.37, 2.65, 2.84, 2.65, 2.30, 2.12, 2.26, 2.22, 1.65, 2.07, 2.10],
                  'Others (including GNC)' : [0.04, 0.58, 0.53, 0.40, 0.83, 0.42, 0.35, 0.36, 1.22, 0.76, 0.65, 0.63]}

# Creating DataFrame
sales_segment_ = pd.DataFrame(data=sales_segment_)

# Setting Index
sales_segment_.index = [2015,2016,2017,2018,2019,1,2,3,4,5,6,2020]

# Creating DataFrame for Covid Period
sales_seg_covid_ = sales_segment_.loc[[1,2,3,4,5,6]].copy()

# Setting Index to string
sales_seg_covid_.index = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']

# Dropping Covid Period from Segment DataFrame
sales_segment_.drop([1,2,3,4,5,6,2020], inplace=True)


In [18]:
# Turning Sales Period DataFrame into MyDataFrame
sales_segment = MyDataFrame(sales_segment_)

# Setting unit, title and footer
sales_segment.unit = '10⁶ m³/day'
sales_segment.title = 'Brazilian Sales of Natural Gas by Segment'
sales_segment.footer = 'Source:\nMME, Monthly Industry Follow-up Bulletin of Natural Gas - June 2020\n\n¹ Includes consumption by refineries, fertilizer factories and use of gas as raw material.'

In [19]:
# Turning Sales Covid Period DataFrame into MyDataFrame
sales_seg_covid = MyDataFrame(sales_seg_covid_)

# Setting unit, title and footer
sales_seg_covid.unit = '10⁶ m³/day'
sales_seg_covid.title = 'Brazilian Sales of Natural Gas 1º Semester 2020'
sales_seg_covid.footer = 'Source:\nMME, Monthly Industry Follow-up Bulletin of Natural Gas - June 2020\n\n¹ Includes consumption by refineries, fertilizer factories and use of gas as raw material.'


### Demand by Segment %

In [20]:
sales_per = sales_segment.df.copy()

sales_per.loc[2015] = sales_segment.df.iloc[0,:].div(sales_segment.df.iloc[0,:].sum())
sales_per.loc[2016] = sales_segment.df.iloc[1,:].div(sales_segment.df.iloc[1,:].sum())
sales_per.loc[2017] = sales_segment.df.iloc[2,:].div(sales_segment.df.iloc[2,:].sum())
sales_per.loc[2018] = sales_segment.df.iloc[3,:].div(sales_segment.df.iloc[3,:].sum())
sales_per.loc[2019] = sales_segment.df.iloc[4,:].div(sales_segment.df.iloc[4,:].sum())

# Converting into MyDataFrame
sales_per = MyDataFrame(sales_per)

# Setting unit, title and footer
sales_per.unit = '%'
sales_per.title = 'Brazilian Sales of Natural Gas by Segment'
sales_per.footer = 'Source:\nMME, Monthly Industry Follow-up Bulletin of Natural Gas - June 2020\n\n¹ Includes consumption by refineries, fertilizer factories and use of gas as raw material.'


## Precipitation DataFrame

In [21]:
skip = np.arange(0,21,1)
precip = MyDataFrame(pd.read_csv(r'data_set\precip.csv', skiprows=skip))

# Setting Columns Name
precip.df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Ago','Sep','Oct','Nov','Dec']

# Setting DataFrama Index
precip.df.index = precip.df.loc[:,'Year']

# Dropping Year Column
precip.df.drop('Year', axis=1, inplace=True)

# Setting unit, title and footer
precip.unit='mm/month'
precip.title='Average Preciptation Rate Monthly'
precip.footer='Source: \nThe data used in this visualization were produced with the Giovanni online data system, developed and maintained by the NASA GES DISC.'

## Creating Energy, Hydro Enegy and Precipitation Time Series

In [22]:
ts_energy = energy_source.df.sum(axis=1).copy().to_frame('GWh')

# Creating datetime index
dt = pd.date_range('1/1/2000', periods=228, freq='M')

# Inserting and Changing ts_energy index
ts_energy.insert(0,'Date',dt)
ts_energy.reset_index(inplace=True)
ts_energy.index = ts_energy['Date']
ts_energy.drop(['Year','Month','Date'], axis=1, inplace=True)

# Changing index name
ts_energy.index.name = ''

In [23]:
ts_hydro = energy_source.df.loc[:,'Hydroelectric'].copy().to_frame('GWh')

# Creating datetime index
dt = pd.date_range('1/1/2000', periods=228, freq='M')

# Inserting and Changing ts_hydro index
ts_hydro.insert(0,'Date',dt)
ts_hydro.reset_index(inplace=True)
ts_hydro.index = ts_hydro['Date']
ts_hydro.drop(['Year','Month','Date'], axis=1, inplace=True)

# Changing index name
ts_hydro.index.name = ''

In [24]:
ts_precip = precip.df.copy()

# Creating datetime index
dt = pd.date_range('1/1/2000', periods=228, freq='M')

# Changing ts_precip index
ts_precip.reset_index(inplace=True)

# Dropping Year
ts_precip.drop('Year', axis=1, inplace=True)

# Turning DataFrame into a Series
ts_precip = ts_precip.stack()

# Changing ts_precip index
ts_precip.index = dt

# Changing index name
ts_precip.index.name = ''

## Carbon Dioxide Emissions Coefficients

In [25]:
# Creating CO2 MyDataFrame
co2_em = MyDataFrame(pd.read_excel(r'data_set\co2_vol_mass_updated.xls', header = [0,1,2], index_col=[0]))


In [46]:
co2_emission = MyDataFrame(co2_em.df.iloc[:,-1].to_frame().copy())

# Setting unit, title and footer
co2_emission.unit = 'Kg/10⁶ BTU'
co2_emission.title = 'Carbon Dioxide Emissions Coefficients by Fuel'
co2_emission.footer = 'Source: \nU.S. Energy Information Administration estimates.\n\nNote: \nTo convert to carbon equivalents multiply by 12/44. Coefficients may vary slightly with estimation method and across time.'

# Changing index name
#co2_emission.df.index.name = ''

# Changing index name
#co2_emission.df.columns.name=''

# Sorting values
#co2_emission.df.sort_values('Per Million Btu', inplace=True)

In [47]:
co2_emission.df

Unnamed: 0_level_0,Per Million Btu
For homes and businesses,Unnamed: 1_level_1
Propane,63.071458
Butane,64.953729
Butane/Propane Mix,64.012594
Home Heating and Diesel Fuel (Distillate),73.164536
Kerosene,72.30271
Coal (All types),95.34523
Natural Gas,53.07037
Gasoline,71.3
Residual Heating Fuel (Businesses only),78.789088
Jet Fuel,70.896572


In [48]:
co2_em.df.iloc[:,-1].to_frame()

Unnamed: 0_level_0,Per Million Btu
For homes and businesses,Unnamed: 1_level_1
Propane,63.071458
Butane,64.953729
Butane/Propane Mix,64.012594
Home Heating and Diesel Fuel (Distillate),73.164536
Kerosene,72.30271
Coal (All types),95.34523
Natural Gas,53.07037
Gasoline,71.3
Residual Heating Fuel (Businesses only),78.789088
Jet Fuel,70.896572
