### Import libraries

In [1]:
#### Data Processing ####
import pandas as pd

#### Scraping ####
import requests
from lxml import html

#### Plot ####
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Bar, Scatter, Figure, Layout, Marker, Box
import colorlover as cl

#### Math ####
import math as math

#### Date ####
from datetime import date, timedelta

### Initiate Notebook mode

In [2]:
init_notebook_mode(connected=True)

### Define Parser

In [3]:
def eco2mix_parser(url):
    
    # Load the page contening the informatiosns
    page = requests.get(url)
    tree = html.fromstring(page.content)
    
    # The website prints values with a variable step (default 15 min) for different date ranges
    m = 120 # for a month range, data print is done step 2H step
    
    # Create the data frame and fill the Date&Time column from the url request
    sdata = pd.DataFrame()
    #sdata.loc[:,'Date&Time'] = pd.date_range(tree.xpath("//date_debut/text()")[0], periods=len(tree.xpath("//type[@v='Nucléaire' and @granularite='Global']/valeur/text()")), freq='15Min')
    sdata.loc[:,'Date&Time'] = pd.date_range(tree.xpath("//date_debut/text()")[0], periods=len(tree.xpath("//type[@v='Nucléaire' and @granularite='Global']/valeur/text()")),freq=(str(m)+'Min'))
        
    # Parse the information about the Energy Production
    types = tree.xpath("//type")
    for t in types:
        sdata.loc[:,t.get('v') + '-' + t.get('granularite')] = pd.Series(tree.xpath("//type[@v='" + t.get('v') + "' and @granularite='" + t.get('granularite') + "']/valeur/text()"))
    
    return sdata

### Settings

In [8]:
# Change the start date, dateDeb, and the end date, dateFin, as following: 
dateStr = date(2016,9,1)   # format yyyy,mm,dd
dateEnd = date(2016,11,30) # format yyyy,mm,dd

### Main

In [9]:
#Check the data period (Split it into months)

period = timedelta()
month = timedelta(days=30)
day = timedelta(days=1)

if dateEnd > dateStr:
    period = dateEnd - dateStr
else:
    print("Set a correct date format")

if period > month:
    timeSlotsNo = math.ceil(period.days / 30);
else:
    timeSlotsNo = 1
#print("Time slots = " + str(timeSlotsNo))
    
data = pd.DataFrame()

for i in range(0,timeSlotsNo):
    if i == 0:
        st = dateStr
    else:
        st = ed + day

    if i == (timeSlotsNo-1):
        ed = dateEnd
    else:
        ed = st + month
        
    if st.day < 10:
        sday = "0" + str(st.day)
    else:
        sday = str(st.day)
        
    if st.month < 10:
        smonth  = "0" + str(st.month)
    else:
        smonth = str(st.month)
        
    if ed.day < 10:
        eday = "0" + str(ed.day)
    else:
        eday = str(ed.day)
        
    if ed.month < 10:
        emonth  = "0" + str(ed.month)
    else:
        emonth = str(ed.month)
        
    start = str(sday) + "/" + str(smonth) + "/" + str(st.year)
    end = str(eday) + "/" + str(emonth) + "/" + str(ed.year)

    url = "http://www.rte-france.com/getEco2MixXml.php?type=mix&&dateDeb=" + start +"&dateFin=" + end +"&mode=NORM"
    #print(url)
    
    if i == 0:
        data = eco2mix_parser(url);
    else:
        data = data.append(eco2mix_parser(url))

print(data.shape)
data.head()

http://www.rte-france.com/getEco2MixXml.php?type=mix&&dateDeb=01/09/2016&dateFin=01/10/2016&mode=NORM
http://www.rte-france.com/getEco2MixXml.php?type=mix&&dateDeb=02/10/2016&dateFin=01/11/2016&mode=NORM
http://www.rte-france.com/getEco2MixXml.php?type=mix&&dateDeb=02/11/2016&dateFin=30/11/2016&mode=NORM
(1092, 26)


Unnamed: 0,Date&Time,Nucléaire-Global,Charbon-Global,Gaz-Global,Fioul-Global,Pointe-Global,Fioul + Pointe-Global,Hydraulique-Global,Eolien-Global,Solde-Global,...,Gaz-TAC,Gaz-COG,Gaz-CCG,Gaz-AUT,Hydraulique-FEE,Hydraulique-LAC,Hydraulique-STT,Autres-DEC,Autres-BMA,Autres-BGA
0,2016-09-01 00:00:00,37045,1339,4437,117,ND,ND,4624,1225,-1456,...,0,432,3989,17,2991,1316,316,486,262,212
1,2016-09-01 02:00:00,36118,1259,4019,121,ND,ND,3688,891,-3557,...,0,446,3560,15,2633,950,105,497,278,217
2,2016-09-01 04:00:00,35608,1035,3350,120,ND,ND,3412,738,-4807,...,0,358,2978,15,2611,803,0,503,271,217
3,2016-09-01 06:00:00,36238,1298,4247,121,ND,ND,3597,589,-4075,...,0,419,3813,15,2363,1234,0,496,277,217
4,2016-09-01 08:00:00,37153,1487,4859,127,ND,ND,5100,528,-2091,...,0,479,4364,17,2627,1960,513,494,276,213


In [10]:
#### Plot

iplot({
    'data': [
        Scatter(x=data['Date&Time'],y=data['Fioul-Global'],mode='lines',name = 'Fioul (MW)'),
        Scatter(x=data['Date&Time'],y=data['Charbon-Global'],mode='lines',name = 'Charbon (MW)'),
        Scatter(x=data['Date&Time'],y=data['Gaz-Global'],mode='lines',name = 'Gaz (MW)'),
        Scatter(x=data['Date&Time'],y=data['Hydraulique-Global'],mode='lines',name = 'Hydraulique (MW)'),
        Scatter(x=data['Date&Time'],y=data['Nucléaire-Global'],mode='lines',name = 'Nucléaire (MW)'),
        Scatter(x=data['Date&Time'],y=data['Solaire-Global'],mode='lines',name = 'Solaire (MW)'),
        Scatter(x=data['Date&Time'],y=data['Eolien-Global'],mode='lines',name = 'Eolien (MW)'),
        Scatter(x=data['Date&Time'],y=data['Autres-Global'],mode='lines',name = 'Bioénergies (MW)'),
        Scatter(x=data['Date&Time'],y=data['Pompage-Global'],mode='lines',name = 'Pompage (MW)'),
        Scatter(x=data['Date&Time'],y=data['Solde-Global'],mode='lines',name = 'Exports (MW)'),  
    ],
    'layout': Layout(title="""Production Nationale Française d'Électricité entre le """+str(dateStr) +' et le '+str(dateEnd))
}, show_link=False)