### Import Libraries

In [1]:
#### Data Processing ####
import pandas as pd

#### Scraping ####
import requests
from lxml import html

#### Plot ####
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Bar, Scatter, Figure, Layout, Marker, Box
import colorlover as cl

#### Math ####
import math as math

#### Date ####
from datetime import date, timedelta

### Initiate Notebook Mode

In [2]:
init_notebook_mode(connected=True)

### Define Parser

In [3]:
def eco2mix_parser(url):
    
    # Load the page contening the informatiosns
    page = requests.get(url)
    tree = html.fromstring(page.content)
    
    # The website prints values with a variable step (default 15 min) for different date ranges
    m = 120 # for a month range, data print is done step 2H step
    
    # Create the data frame and fill the Date&Time column from the url request
    sdata = pd.DataFrame()
    #sdata.loc[:,'Date&Time'] = pd.date_range(tree.xpath("//date_debut/text()")[0], periods=len(tree.xpath("//type[@v='Nucléaire' and @granularite='Global']/valeur/text()")), freq='15Min')
    sdata.loc[:,'Date&Time'] = pd.date_range(tree.xpath("//date_debut/text()")[0], periods=len(tree.xpath("//type[@v='Nucléaire' and @granularite='Global']/valeur/text()")),freq=(str(m)+'Min'))
        
    # Parse the information about the Energy Production
    types = tree.xpath("//type")
    for t in types:
        sdata.loc[:,t.get('v') + '-' + t.get('granularite')] = pd.Series(tree.xpath("//type[@v='" + t.get('v') + "' and @granularite='" + t.get('granularite') + "']/valeur/text()"))
    
    return sdata

### Settings

In [4]:
# Change the start date, dateDeb, and the end date, dateFin, as following: 
dateStr = date(2016,9,1)   # format yyyy,mm,dd
dateEnd = date(2017,8,31) # format yyyy,mm,dd

### Main

In [5]:
#Check the data period (Split it into months)

period = timedelta()
month = timedelta(days=30)
day = timedelta(days=1)

if dateEnd > dateStr:
    period = dateEnd - dateStr
else:
    print("Set a correct date format")

if period > month:
    timeSlotsNo = math.ceil(period.days / 30);
else:
    timeSlotsNo = 1
#print("Time slots = " + str(timeSlotsNo))
    
data = pd.DataFrame()

for i in range(0,timeSlotsNo):
    if i == 0:
        st = dateStr
    else:
        st = ed + day

    if i == (timeSlotsNo-1):
        ed = dateEnd
    else:
        ed = st + month
        
    if st.day < 10:
        sday = "0" + str(st.day)
    else:
        sday = str(st.day)
        
    if st.month < 10:
        smonth  = "0" + str(st.month)
    else:
        smonth = str(st.month)
        
    if ed.day < 10:
        eday = "0" + str(ed.day)
    else:
        eday = str(ed.day)
        
    if ed.month < 10:
        emonth  = "0" + str(ed.month)
    else:
        emonth = str(ed.month)
        
    start = str(sday) + "/" + str(smonth) + "/" + str(st.year)
    end = str(eday) + "/" + str(emonth) + "/" + str(ed.year)

    url = "http://www.rte-france.com/getEco2MixXml.php?type=mix&&dateDeb=" + start +"&dateFin=" + end +"&mode=NORM"
    #print(url)
    
    if i == 0:
        data = eco2mix_parser(url);
    else:
        data = data.append(eco2mix_parser(url))

print(data.shape)
data.head()

(4464, 26)


Unnamed: 0,Autres-BGA,Autres-BMA,Autres-DEC,Autres-Global,Charbon-Global,Date&Time,Eolien-Global,Fioul + Pointe-Global,Fioul-AUT,Fioul-COG,...,Gaz-TAC,Hydraulique-FEE,Hydraulique-Global,Hydraulique-LAC,Hydraulique-STT,Nucléaire-Global,Pointe-Global,Pompage-Global,Solaire-Global,Solde-Global
0,212,262,486,960,1339,2016-09-01 00:00:00,1225,ND,36,71,...,0,2991,4624,1316,316,37045,ND,-32,0,-1456
1,217,278,497,992,1259,2016-09-01 02:00:00,891,ND,41,71,...,0,2633,3688,950,105,36118,ND,-2067,0,-3557
2,217,271,503,991,1035,2016-09-01 04:00:00,738,ND,41,71,...,0,2611,3412,803,0,35608,ND,-2637,0,-4807
3,217,277,496,989,1298,2016-09-01 06:00:00,589,ND,41,71,...,0,2363,3597,1234,0,36238,ND,-2552,1,-4075
4,213,276,494,984,1487,2016-09-01 08:00:00,528,ND,46,71,...,0,2627,5100,1960,513,37153,ND,-22,203,-2091


### Plot

In [6]:
#### Plot

iplot({
    'data': [
        Scatter(x=data['Date&Time'],y=data['Fioul-Global'],mode='lines',name = 'Fioul (MW)'),
        Scatter(x=data['Date&Time'],y=data['Charbon-Global'],mode='lines',name = 'Charbon (MW)'),
        Scatter(x=data['Date&Time'],y=data['Gaz-Global'],mode='lines',name = 'Gaz (MW)'),
        Scatter(x=data['Date&Time'],y=data['Hydraulique-Global'],mode='lines',name = 'Hydraulique (MW)'),
        Scatter(x=data['Date&Time'],y=data['Nucléaire-Global'],mode='lines',name = 'Nucléaire (MW)'),
        Scatter(x=data['Date&Time'],y=data['Solaire-Global'],mode='lines',name = 'Solaire (MW)'),
        Scatter(x=data['Date&Time'],y=data['Eolien-Global'],mode='lines',name = 'Eolien (MW)'),
        Scatter(x=data['Date&Time'],y=data['Autres-Global'],mode='lines',name = 'Bioénergies (MW)'),
        Scatter(x=data['Date&Time'],y=data['Pompage-Global'],mode='lines',name = 'Pompage (MW)'),
        Scatter(x=data['Date&Time'],y=data['Solde-Global'],mode='lines',name = 'Exports (MW)'),  
    ],
    'layout': Layout(title="""Production Nationale Française d'Électricité entre le """+str(dateStr) +' et le '+str(dateEnd))
}, show_link=False)