In [2]:
import pandas as pd

In [3]:
# Read data from website
col_names = ['ENTIDAD', 'AÑO', 'ENERO', 'FEBRERO', 'MARZO', 'ABRIL', 'MAYO', 'JUNIO',
             'JULIO', 'AGOSTO', 'SEPTIEMBRE', 'OCTUBRE', 'NOVIEMBRE', 'DICIEMBRE', 'UNIDAD']
temperature = pd.read_csv("http://201.116.60.46/DatosAbiertos/Temperatura_maxima.csv",
                          header = 0,
                          names = col_names
                         )

In [4]:
# Check data was correctly loaded
temperature.head()

Unnamed: 0,ENTIDAD,AÑO,ENERO,FEBRERO,MARZO,ABRIL,MAYO,JUNIO,JULIO,AGOSTO,SEPTIEMBRE,OCTUBRE,NOVIEMBRE,DICIEMBRE,UNIDAD
0,AGUASCALIENTES,1985.0,20.712779,23.740903,26.517556,26.513333,30.245694,26.440566,24.571646,25.713025,26.628718,25.651055,24.206026,21.895806,�C
1,BAJA CALIFORNIA,1985.0,17.562808,19.548684,20.531768,26.056186,26.951639,32.019627,34.452996,34.598735,29.792175,27.084412,21.310611,20.509112,�C
2,BAJA CALIFORNIA SUR,1985.0,22.659097,24.271991,27.016454,28.987758,31.579313,33.379501,35.283168,35.566273,34.789445,31.879994,27.713117,26.151145,�C
3,CAMPECHE,1985.0,29.954718,31.793426,34.539973,34.748942,35.790283,34.990676,34.373901,34.654897,33.816518,32.921266,32.362377,30.606336,�C
4,COAHUILA DE ZARAGOZA,1985.0,15.060987,19.340677,25.644739,27.624163,31.286655,31.87383,31.997727,32.132009,31.033711,27.792773,25.690451,20.999594,�C


In [5]:
# Quick info on the dataset
temperature.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 993 entries, 0 to 992
Data columns (total 15 columns):
ENTIDAD       992 non-null object
AÑO           992 non-null float64
ENERO         992 non-null float64
FEBRERO       992 non-null float64
MARZO         992 non-null float64
ABRIL         992 non-null float64
MAYO          992 non-null float64
JUNIO         992 non-null float64
JULIO         992 non-null float64
AGOSTO        992 non-null float64
SEPTIEMBRE    992 non-null float64
OCTUBRE       960 non-null float64
NOVIEMBRE     960 non-null float64
DICIEMBRE     960 non-null float64
UNIDAD        992 non-null object
dtypes: float64(13), object(2)
memory usage: 116.4+ KB


In [6]:
# Delete the 'UNIDAD' column
temperature.drop('UNIDAD', axis = 1, inplace = True)

In [7]:
# Check dimensions
temperature.shape

(993, 14)

In [8]:
# Gather the dataset
temperature = pd.melt(temperature, 
        id_vars=['ENTIDAD', 'AÑO'], 
        value_vars = ['ENERO', 'FEBRERO', 'MARZO', 'ABRIL', 'MAYO', 'JUNIO',
                      'JULIO', 'AGOSTO', 'SEPTIEMBRE', 'OCTUBRE', 'NOVIEMBRE', 'DICIEMBRE'],
        var_name = 'MES',
        value_name = 'TEMPERATURA')

In [9]:
# Drop NaNs
temperature.dropna(how='any', inplace=True)

In [10]:
# Change data type of AÑO
temperature['AÑO'] = temperature['AÑO'].astype(int)

In [11]:
# Convert MES to integer
def convertidorMES(mes):
    meses = ['ENERO', 'FEBRERO', 'MARZO', 'ABRIL', 'MAYO', 'JUNIO',
             'JULIO', 'AGOSTO', 'SEPTIEMBRE', 'OCTUBRE', 'NOVIEMBRE', 'DICIEMBRE']
    return meses.index(mes) + 1

temperature['MES_int'] = temperature.MES.apply(convertidorMES)

In [12]:
# Create FECHA column
from datetime import datetime

x = []
for year, month in zip(temperature['AÑO'], temperature['MES_int']):
    x.append(datetime(year = year, month = month, day = 1))

temperature['FECHA'] = x

In [13]:
temperature.head()

Unnamed: 0,ENTIDAD,AÑO,MES,TEMPERATURA,MES_int,FECHA
0,AGUASCALIENTES,1985,ENERO,20.712779,1,1985-01-01
1,BAJA CALIFORNIA,1985,ENERO,17.562808,1,1985-01-01
2,BAJA CALIFORNIA SUR,1985,ENERO,22.659097,1,1985-01-01
3,CAMPECHE,1985,ENERO,29.954718,1,1985-01-01
4,COAHUILA DE ZARAGOZA,1985,ENERO,15.060987,1,1985-01-01


In [14]:
temperature.groupby(by = 'ENTIDAD')

<pandas.core.groupby.DataFrameGroupBy object at 0x7ffb82cabb50>

In [None]:
import plotly as py
import plotly.graph_objs as go

py.offline.init_notebook_mode() # run at the start of every ipython notebook so that we can work offline


Matplotlib is building the font cache using fc-list. This may take a moment.



In [44]:
scatter_plot_trace = go.Scatter(
    x = temperature['FECHA'],  
    y = temperature['TEMPERATURA'],
    mode = "markers",  # we want the observations to be points
    marker = dict(  # this object describes what the points will look like
        size = 4  # this is the size of the points; we could also change the colors
    )
)

traces = [scatter_plot_trace]  # the traces that make up our plot have to be stored in a list, even there is only one

scatter_plot_layout = go.Layout(
    xaxis = dict(title = 'Fecha'),  # we reuse the names
    yaxis = dict(title = 'Temperatura')
)

fig = go.Figure(data = traces, layout = scatter_plot_layout)  # we build a figure object that takes the traces as an input

py.offline.iplot(fig)  # finally we plot the figure object

In [102]:
estados = sorted(list(set(temperature.ENTIDAD)))

estados_nombres = ['Aguascalientes',
                   'Baja California',
                   'Baja California Sur',
                   'Campeche',
                   'Chiapas',
                   'Chihuahua',
                   'Coahuila',
                   'Colima',
                   'DF',
                   'Durango',
                   'Guanajuato',
                   'Guerrero',
                   'Hidalgo',
                   'Jalisco',
                   'Michoacán',
                   'Morelos',
                   'México',
                   'Nayarit',
                   'Nuevo León',
                   'Oaxaca',
                   'Puebla',
                   'Querétaro',
                   'Quintana Roo',
                   'San Luis Potosí',
                   'Sinaloa',
                   'Sonora',
                   'Tabasco',
                   'Tamaulipas',
                   'Tlaxcala',
                   'Veracruz',
                   'Yucatán',
                   'Zacatecas']

title = 'Variacion de Temperatura'

def get_spaced_colors(n):
    max_value = 16581375 #255**3
    interval = int(max_value / n)
    colors = [hex(I)[2:].zfill(6) for I in range(0, max_value, interval)]
    
    return ['rgb' + str((int(i[:2], 16), int(i[2:4], 16), int(i[4:], 16))) for i in colors]

colors = get_spaced_colors(31)

line_size = [2] * 32

traces = []

for i in range(len(estados)):
    temporal = temperature[temperature.ENTIDAD == estados[i]].sort_values('FECHA')
    traces.append(go.Scatter(
        x=temporal.FECHA,
        y=temporal.TEMPERATURA,
        mode='lines+markers',
        line=dict(color=colors[i], width=line_size[i]),
        showlegend=True,
        name=estados_nombres[i],
    ))
    
scatter_plot_layout = go.Layout(
    xaxis = dict(title = 'Fecha'),  # we reuse the names
    yaxis = dict(title = 'Temperatura')
)

fig = go.Figure(data = traces, layout = scatter_plot_layout)  # we build a figure object that takes the traces as an input

py.offline.iplot(fig)