In [None]:
import pandas as pd

In [None]:
# Read data from website
col_names = ['ENTIDAD', 'AÑO', 'ENERO', 'FEBRERO', 'MARZO', 'ABRIL', 'MAYO', 'JUNIO',
             'JULIO', 'AGOSTO', 'SEPTIEMBRE', 'OCTUBRE', 'NOVIEMBRE', 'DICIEMBRE', 'UNIDAD']
temperature = pd.read_csv("http://201.116.60.46/DatosAbiertos/Temperatura_maxima.csv",
                          header = 0,
                          names = col_names
                         )

In [None]:
# Check data was correctly loaded
temperature.head()

In [None]:
# Quick info on the dataset
temperature.info()

In [None]:
# Delete the 'UNIDAD' column
temperature.drop('UNIDAD', axis = 1, inplace = True)

In [None]:
# Check dimensions
temperature.shape

In [None]:
# Gather the dataset
temperature = pd.melt(temperature, 
        id_vars=['ENTIDAD', 'AÑO'], 
        value_vars = ['ENERO', 'FEBRERO', 'MARZO', 'ABRIL', 'MAYO', 'JUNIO',
                      'JULIO', 'AGOSTO', 'SEPTIEMBRE', 'OCTUBRE', 'NOVIEMBRE', 'DICIEMBRE'],
        var_name = 'MES',
        value_name = 'TEMPERATURA')

In [None]:
# Drop NaNs
temperature.dropna(how='any', inplace=True)

In [None]:
# Change data type of AÑO
temperature['AÑO'] = temperature['AÑO'].astype(int)

In [None]:
# Convert MES to integer
def convertidorMES(mes):
    meses = ['ENERO', 'FEBRERO', 'MARZO', 'ABRIL', 'MAYO', 'JUNIO',
             'JULIO', 'AGOSTO', 'SEPTIEMBRE', 'OCTUBRE', 'NOVIEMBRE', 'DICIEMBRE']
    return meses.index(mes) + 1

temperature['MES_int'] = temperature.MES.apply(convertidorMES)

In [None]:
# Create FECHA column
from datetime import datetime

x = []
for year, month in zip(temperature['AÑO'], temperature['MES_int']):
    x.append(datetime(year = year, month = month, day = 1))

temperature['FECHA'] = x

In [None]:
temperature.head()

In [None]:
temperature.groupby(by = 'ENTIDAD')

In [None]:
import plotly as py
import plotly.graph_objs as go

py.offline.init_notebook_mode() # run at the start of every ipython notebook so that we can work offline

In [None]:
scatter_plot_trace = go.Scatter(
    x = temperature['FECHA'],  
    y = temperature['TEMPERATURA'],
    mode = "markers",  # we want the observations to be points
    marker = dict(  # this object describes what the points will look like
        size = 4  # this is the size of the points; we could also change the colors
    )
)

traces = [scatter_plot_trace]  # the traces that make up our plot have to be stored in a list, even there is only one

scatter_plot_layout = go.Layout(
    xaxis = dict(title = 'Fecha'),  # we reuse the names
    yaxis = dict(title = 'Temperatura')
)

fig = go.Figure(data = traces, layout = scatter_plot_layout)  # we build a figure object that takes the traces as an input

py.offline.iplot(fig)  # finally we plot the figure object

In [None]:
estados = sorted(list(set(temperature.ENTIDAD)))

estados_nombres = ['Aguascalientes',
                   'Baja California',
                   'Baja California Sur',
                   'Campeche',
                   'Chiapas',
                   'Chihuahua',
                   'Coahuila',
                   'Colima',
                   'DF',
                   'Durango',
                   'Guanajuato',
                   'Guerrero',
                   'Hidalgo',
                   'Jalisco',
                   'Michoacán',
                   'Morelos',
                   'México',
                   'Nayarit',
                   'Nuevo León',
                   'Oaxaca',
                   'Puebla',
                   'Querétaro',
                   'Quintana Roo',
                   'San Luis Potosí',
                   'Sinaloa',
                   'Sonora',
                   'Tabasco',
                   'Tamaulipas',
                   'Tlaxcala',
                   'Veracruz',
                   'Yucatán',
                   'Zacatecas']

title = 'Variacion de Temperatura'

def get_spaced_colors(n):
    max_value = 16581375 #255**3
    interval = int(max_value / n)
    colors = [hex(I)[2:].zfill(6) for I in range(0, max_value, interval)]
    
    return ['rgb' + str((int(i[:2], 16), int(i[2:4], 16), int(i[4:], 16))) for i in colors]

colors = get_spaced_colors(31)

line_size = [2] * 32

traces = []

for i in range(len(estados)):
    temporal = temperature[temperature.ENTIDAD == estados[i]].sort_values('FECHA')
    traces.append(go.Scatter(
        x=temporal.FECHA,
        y=temporal.TEMPERATURA,
        mode='lines+markers',
        line=dict(color=colors[i], width=line_size[i]),
        showlegend=True,
        name=estados_nombres[i],
    ))
    
scatter_plot_layout = go.Layout(
    xaxis = dict(title = 'Fecha'),  # we reuse the names
    yaxis = dict(title = 'Temperatura')
)

fig = go.Figure(data = traces, layout = scatter_plot_layout)  # we build a figure object that takes the traces as an input

py.offline.iplot(fig)

In [17]:
temperature.columns

Index([u'ENTIDAD', u'AÑO', u'MES', u'TEMPERATURA', u'MES_int', u'FECHA'], dtype='object')

In [19]:
temperature.ENTIDAD

0                         AGUASCALIENTES
1                        BAJA CALIFORNIA
2                    BAJA CALIFORNIA SUR
3                               CAMPECHE
4                  COAHUILA  DE ZARAGOZA
5                                 COLIMA
6                                CHIAPAS
7                              CHIHUAHUA
8                       DISTRITO FEDERAL
9                                DURANGO
10                            GUANAJUATO
11                              GUERRERO
12                               HIDALGO
13                               JALISCO
14                                M�XICO
15                   MICHOAC�N DE OCAMPO
16                               MORELOS
17                               NAYARIT
18                            NUEVO LE�N
19                                OAXACA
20                                PUEBLA
21                             QUER�TARO
22                          QUINTANA ROO
23                       SAN LUIS POTOS�
24              

In [23]:
anio =temperature.columns[1]

In [27]:
temperature['AÑO']

0        1985
1        1985
2        1985
3        1985
4        1985
5        1985
6        1985
7        1985
8        1985
9        1985
10       1985
11       1985
12       1985
13       1985
14       1985
15       1985
16       1985
17       1985
18       1985
19       1985
20       1985
21       1985
22       1985
23       1985
24       1985
25       1985
26       1985
27       1985
28       1985
29       1985
         ... 
11853    2014
11854    2014
11855    2014
11856    2014
11857    2014
11858    2014
11859    2014
11860    2014
11861    2014
11862    2014
11863    2014
11864    2014
11865    2014
11866    2014
11867    2014
11868    2014
11869    2014
11870    2014
11871    2014
11872    2014
11873    2014
11874    2014
11875    2014
11876    2014
11877    2014
11878    2014
11879    2014
11880    2014
11881    2014
11882    2014
Name: AÑO, dtype: int64

In [28]:
temperature.to_pickle('temperature_mex_gov.p')