In [50]:
# Pandas es el Excel de python.
# Obtenido de: https://pandas.pydata.org/docs/user_guide/10min.html

import pandas as pd
import numpy as np

In [51]:
# Una columna en pandas se llama una serie
# np.nan es un valor faltante
# Cada elemento tiene un índice.

s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [52]:
# El índice puede ser una fecha

fechas = pd.date_range("20130101", periods = 6, freq = "D")

print(fechas)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')


In [53]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index = fechas, name = "datos")

print(s)

2013-01-01    1.0
2013-01-02    3.0
2013-01-03    5.0
2013-01-04    NaN
2013-01-05    6.0
2013-01-06    8.0
Freq: D, Name: datos, dtype: float64


In [54]:
# Un dataframe es un conjunto de columnas.

# creado a partir de series.
df = pd.DataFrame( np.random.randn(6, 4), index = fechas,
    columns = ["A", "B", "C", "D"] )

print(df)

                   A         B         C         D
2013-01-01  0.721528  2.163466  0.197465  0.956179
2013-01-02  0.211874  0.341063 -0.479328 -0.219198
2013-01-03  0.003530  1.188484  1.072230 -1.035879
2013-01-04 -0.587499 -1.181399  0.866636 -0.293493
2013-01-05  1.007495  0.902794  1.910276 -0.572103
2013-01-06  1.814723  0.509839  1.940401 -1.389314


In [55]:
# head() y tail() muestran la parte 
# superior e inferior del Dataframe.

print(df.head())
print(df.tail())

                   A         B         C         D
2013-01-01  0.721528  2.163466  0.197465  0.956179
2013-01-02  0.211874  0.341063 -0.479328 -0.219198
2013-01-03  0.003530  1.188484  1.072230 -1.035879
2013-01-04 -0.587499 -1.181399  0.866636 -0.293493
2013-01-05  1.007495  0.902794  1.910276 -0.572103
                   A         B         C         D
2013-01-02  0.211874  0.341063 -0.479328 -0.219198
2013-01-03  0.003530  1.188484  1.072230 -1.035879
2013-01-04 -0.587499 -1.181399  0.866636 -0.293493
2013-01-05  1.007495  0.902794  1.910276 -0.572103
2013-01-06  1.814723  0.509839  1.940401 -1.389314


In [56]:
# Podemos acceder al índice y las columnas.

print(df.index)
print(df.columns)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
Index(['A', 'B', 'C', 'D'], dtype='object')


In [57]:
# Trasponer.

print(df.T)

   2013-01-01  2013-01-02  2013-01-03  2013-01-04  2013-01-05  2013-01-06
A    0.721528    0.211874    0.003530   -0.587499    1.007495    1.814723
B    2.163466    0.341063    1.188484   -1.181399    0.902794    0.509839
C    0.197465   -0.479328    1.072230    0.866636    1.910276    1.940401
D    0.956179   -0.219198   -1.035879   -0.293493   -0.572103   -1.389314


In [58]:
# Convertir a un arreglo de numpy.

print( df.to_numpy() )

[[ 0.7215285   2.16346566  0.197465    0.95617857]
 [ 0.21187426  0.3410633  -0.47932769 -0.21919774]
 [ 0.00353044  1.18848374  1.07222958 -1.03587885]
 [-0.58749949 -1.18139862  0.86663573 -0.29349287]
 [ 1.00749487  0.9027945   1.91027648 -0.57210265]
 [ 1.81472285  0.50983949  1.94040128 -1.38931445]]


In [59]:
# Estadística descriptiva.

print( df.describe() )

              A         B         C         D
count  6.000000  6.000000  6.000000  6.000000
mean   0.528609  0.654041  0.917947 -0.425635
std    0.841708  1.105754  0.951634  0.811420
min   -0.587499 -1.181399 -0.479328 -1.389314
25%    0.055616  0.383257  0.364758 -0.919935
50%    0.466701  0.706317  0.969433 -0.432798
75%    0.936003  1.117061  1.700765 -0.237772
max    1.814723  2.163466  1.940401  0.956179


In [60]:
# Ordenar datos.

# Por índice.

print( df.sort_index(ascending = False) )

                   A         B         C         D
2013-01-06  1.814723  0.509839  1.940401 -1.389314
2013-01-05  1.007495  0.902794  1.910276 -0.572103
2013-01-04 -0.587499 -1.181399  0.866636 -0.293493
2013-01-03  0.003530  1.188484  1.072230 -1.035879
2013-01-02  0.211874  0.341063 -0.479328 -0.219198
2013-01-01  0.721528  2.163466  0.197465  0.956179


In [61]:
# Por columna.
print( df.sort_values(by = "B") )

                   A         B         C         D
2013-01-04 -0.587499 -1.181399  0.866636 -0.293493
2013-01-02  0.211874  0.341063 -0.479328 -0.219198
2013-01-06  1.814723  0.509839  1.940401 -1.389314
2013-01-05  1.007495  0.902794  1.910276 -0.572103
2013-01-03  0.003530  1.188484  1.072230 -1.035879
2013-01-01  0.721528  2.163466  0.197465  0.956179


In [62]:
# Escoger una columna.

print( df["A"] )

2013-01-01    0.721528
2013-01-02    0.211874
2013-01-03    0.003530
2013-01-04   -0.587499
2013-01-05    1.007495
2013-01-06    1.814723
Freq: D, Name: A, dtype: float64


In [63]:
# Escoger una fila.

print( df.loc[df.index[0]] )

A    0.721528
B    2.163466
C    0.197465
D    0.956179
Name: 2013-01-01 00:00:00, dtype: float64


In [64]:
# Escoger por número de fila.

print( df.iloc[0] )

A    0.721528
B    2.163466
C    0.197465
D    0.956179
Name: 2013-01-01 00:00:00, dtype: float64


In [65]:
# Escoger por fila y columna.

print( df.loc[df.index[2], "B"] )

1.1884837417990106


In [66]:
# Escoger por número de fila y columna.

print( df.iloc[2, 1] )

1.1884837417990106


In [67]:
# Escoger una subsección del dataframe.

df_2 = df.iloc[2:4, 1:3]
print( df_2 )

                   B         C
2013-01-03  1.188484  1.072230
2013-01-04 -1.181399  0.866636


In [68]:
df_2 = df.iloc[2:4, :]
print( df_2 )

# Las copias y vistas funcionan igual que en numpy!!

                   A         B         C         D
2013-01-03  0.003530  1.188484  1.072230 -1.035879
2013-01-04 -0.587499 -1.181399  0.866636 -0.293493


In [69]:
# Búsqueda por condiciones.

# En una columna.
print( df[df["A"] > 0] )

                   A         B         C         D
2013-01-01  0.721528  2.163466  0.197465  0.956179
2013-01-02  0.211874  0.341063 -0.479328 -0.219198
2013-01-03  0.003530  1.188484  1.072230 -1.035879
2013-01-05  1.007495  0.902794  1.910276 -0.572103
2013-01-06  1.814723  0.509839  1.940401 -1.389314


In [70]:
# En todo el dataframe.

print( df[df>0] )

                   A         B         C         D
2013-01-01  0.721528  2.163466  0.197465  0.956179
2013-01-02  0.211874  0.341063       NaN       NaN
2013-01-03  0.003530  1.188484  1.072230       NaN
2013-01-04       NaN       NaN  0.866636       NaN
2013-01-05  1.007495  0.902794  1.910276       NaN
2013-01-06  1.814723  0.509839  1.940401       NaN


In [71]:
# Agregar columnas.
# Todos los elementos de una columna son del mismo tipo.
# Las columnas pueden tener distintos tipos.

df["E"] = ["uno", "uno", "dos", "tres", "cuatro", "tres"]

print(df)

                   A         B         C         D       E
2013-01-01  0.721528  2.163466  0.197465  0.956179     uno
2013-01-02  0.211874  0.341063 -0.479328 -0.219198     uno
2013-01-03  0.003530  1.188484  1.072230 -1.035879     dos
2013-01-04 -0.587499 -1.181399  0.866636 -0.293493    tres
2013-01-05  1.007495  0.902794  1.910276 -0.572103  cuatro
2013-01-06  1.814723  0.509839  1.940401 -1.389314    tres


In [72]:
# Nueva columna con arreglos.

df["F"] = np.linspace(1, 5, 6)
print(df.head())

                   A         B         C         D       E    F
2013-01-01  0.721528  2.163466  0.197465  0.956179     uno  1.0
2013-01-02  0.211874  0.341063 -0.479328 -0.219198     uno  1.8
2013-01-03  0.003530  1.188484  1.072230 -1.035879     dos  2.6
2013-01-04 -0.587499 -1.181399  0.866636 -0.293493    tres  3.4
2013-01-05  1.007495  0.902794  1.910276 -0.572103  cuatro  4.2


In [73]:
# Buscar valores específicos con isin()

print( df[df["E"].isin(["dos", "cuatro"])] )

                   A         B         C         D       E    F
2013-01-03  0.003530  1.188484  1.072230 -1.035879     dos  2.6
2013-01-05  1.007495  0.902794  1.910276 -0.572103  cuatro  4.2


In [74]:
# Podemos cambiar un valor específico.

df.iloc[0, 1] = 10
print(df)

                   A          B         C         D       E    F
2013-01-01  0.721528  10.000000  0.197465  0.956179     uno  1.0
2013-01-02  0.211874   0.341063 -0.479328 -0.219198     uno  1.8
2013-01-03  0.003530   1.188484  1.072230 -1.035879     dos  2.6
2013-01-04 -0.587499  -1.181399  0.866636 -0.293493    tres  3.4
2013-01-05  1.007495   0.902794  1.910276 -0.572103  cuatro  4.2
2013-01-06  1.814723   0.509839  1.940401 -1.389314    tres  5.0


In [75]:
# Quitar columna.

df = df.drop("E", axis = 1)
print(df)

                   A          B         C         D    F
2013-01-01  0.721528  10.000000  0.197465  0.956179  1.0
2013-01-02  0.211874   0.341063 -0.479328 -0.219198  1.8
2013-01-03  0.003530   1.188484  1.072230 -1.035879  2.6
2013-01-04 -0.587499  -1.181399  0.866636 -0.293493  3.4
2013-01-05  1.007495   0.902794  1.910276 -0.572103  4.2
2013-01-06  1.814723   0.509839  1.940401 -1.389314  5.0


In [76]:
# Establecer valores con condicional.

df[df<0] = 0
print(df)

                   A          B         C         D    F
2013-01-01  0.721528  10.000000  0.197465  0.956179  1.0
2013-01-02  0.211874   0.341063  0.000000  0.000000  1.8
2013-01-03  0.003530   1.188484  1.072230  0.000000  2.6
2013-01-04  0.000000   0.000000  0.866636  0.000000  3.4
2013-01-05  1.007495   0.902794  1.910276  0.000000  4.2
2013-01-06  1.814723   0.509839  1.940401  0.000000  5.0


In [77]:
# Agregamos algunos valores nulos.

df[df>2] = np.nan
print(df)

                   A         B         C         D    F
2013-01-01  0.721528       NaN  0.197465  0.956179  1.0
2013-01-02  0.211874  0.341063  0.000000  0.000000  1.8
2013-01-03  0.003530  1.188484  1.072230  0.000000  NaN
2013-01-04  0.000000  0.000000  0.866636  0.000000  NaN
2013-01-05  1.007495  0.902794  1.910276  0.000000  NaN
2013-01-06  1.814723  0.509839  1.940401  0.000000  NaN


In [78]:
# Quitar filas con valores nulos.
print(df.dropna())

# Rellenar valores nulos.
print(df.fillna(-100))

# Determinar valores nulos.
print(df.isna())

                   A         B    C    D    F
2013-01-02  0.211874  0.341063  0.0  0.0  1.8
                   A           B         C         D      F
2013-01-01  0.721528 -100.000000  0.197465  0.956179    1.0
2013-01-02  0.211874    0.341063  0.000000  0.000000    1.8
2013-01-03  0.003530    1.188484  1.072230  0.000000 -100.0
2013-01-04  0.000000    0.000000  0.866636  0.000000 -100.0
2013-01-05  1.007495    0.902794  1.910276  0.000000 -100.0
2013-01-06  1.814723    0.509839  1.940401  0.000000 -100.0
                A      B      C      D      F
2013-01-01  False   True  False  False  False
2013-01-02  False  False  False  False  False
2013-01-03  False  False  False  False   True
2013-01-04  False  False  False  False   True
2013-01-05  False  False  False  False   True
2013-01-06  False  False  False  False   True


In [79]:
# operaciones.

# Entre columnas.
df["G"] = df["A"] + df["C"]

print(df)

# promedio
print(df.mean())
print(df.mean(1))

                   A         B         C         D    F         G
2013-01-01  0.721528       NaN  0.197465  0.956179  1.0  0.918993
2013-01-02  0.211874  0.341063  0.000000  0.000000  1.8  0.211874
2013-01-03  0.003530  1.188484  1.072230  0.000000  NaN  1.075760
2013-01-04  0.000000  0.000000  0.866636  0.000000  NaN  0.866636
2013-01-05  1.007495  0.902794  1.910276  0.000000  NaN  2.917771
2013-01-06  1.814723  0.509839  1.940401  0.000000  NaN  3.755124
A    0.626525
B    0.588436
C    0.997835
D    0.159363
F    1.400000
G    1.624360
dtype: float64
2013-01-01    0.758833
2013-01-02    0.427469
2013-01-03    0.668001
2013-01-04    0.346654
2013-01-05    1.347667
2013-01-06    1.604018
Freq: D, dtype: float64


In [80]:
# Aplicar operación.
# np.cumsum() hace una suma acumulada.
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F,G
2013-01-01,0.721528,,0.197465,0.956179,1.0,0.918993
2013-01-02,0.933403,0.341063,0.197465,0.956179,2.8,1.130868
2013-01-03,0.936933,1.529547,1.269695,0.956179,,2.206628
2013-01-04,0.936933,1.529547,2.13633,0.956179,,3.073264
2013-01-05,1.944428,2.432342,4.046607,0.956179,,5.991035
2013-01-06,3.759151,2.942181,5.987008,0.956179,,9.746159


In [81]:
df["H"] = np.log( df["C"] )

df

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,A,B,C,D,F,G,H
2013-01-01,0.721528,,0.197465,0.956179,1.0,0.918993,-1.622194
2013-01-02,0.211874,0.341063,0.0,0.0,1.8,0.211874,-inf
2013-01-03,0.00353,1.188484,1.07223,0.0,,1.07576,0.06974
2013-01-04,0.0,0.0,0.866636,0.0,,0.866636,-0.143137
2013-01-05,1.007495,0.902794,1.910276,0.0,,2.917771,0.647248
2013-01-06,1.814723,0.509839,1.940401,0.0,,3.755124,0.662895


In [82]:
# Se puede obtener un histograma.

s = pd.Series(np.random.randint(0, 7, size = 10))
print (s)


0    2
1    2
2    3
3    5
4    3
5    0
6    0
7    3
8    3
9    6
dtype: int64


In [83]:
print(s.value_counts())

3    4
2    2
0    2
5    1
6    1
dtype: int64


In [84]:
# Agrupar datos

fechas = pd.date_range("20130101", periods = 200, freq = "D")
df = pd.DataFrame( np.random.randn(200, 4), index = fechas,
    columns = ["A", "B", "C", "D"] )

print(df)

                   A         B         C         D
2013-01-01 -0.081012  0.782166  0.168883 -2.235812
2013-01-02  0.478676  0.530940  0.613818 -0.029573
2013-01-03 -1.006465 -0.173889 -1.770311 -1.271142
2013-01-04 -2.722963 -2.505809  0.044979  0.556915
2013-01-05 -0.192212 -0.478922  0.368318 -0.257449
...              ...       ...       ...       ...
2013-07-15 -1.343711  0.096377 -0.113509  0.303902
2013-07-16 -0.333740 -1.249941 -1.649008 -1.829500
2013-07-17 -0.895110 -0.308012 -1.106519 -0.527387
2013-07-18  0.697204 -0.097088 -0.494006  0.346064
2013-07-19 -1.802230  0.830730  0.289162 -0.851615

[200 rows x 4 columns]


In [85]:
# Agrupamos y realizamos una operación.
df.groupby(df.index.month).sum()

Unnamed: 0,A,B,C,D
1,-0.798077,3.83856,-4.049747,-8.885975
2,-1.430027,-14.533154,6.573088,2.720362
3,-3.524114,1.508642,-3.23955,8.713066
4,-9.598943,9.44128,-5.596464,4.617217
5,1.009982,4.907997,-1.932292,-1.692895
6,-4.890213,-1.907895,2.934414,-3.595034
7,0.453305,-5.370138,3.398496,-4.566965


In [86]:
df.groupby(df.index.day).mean()

Unnamed: 0,A,B,C,D
1,0.080776,0.035915,0.437792,-0.264141
2,0.486945,-0.227749,0.862887,-0.703551
3,-0.097854,-0.315475,-0.523858,0.041969
4,-0.425877,-0.463429,-0.36333,0.577864
5,0.108469,-0.191223,0.325115,-0.173312
6,0.382845,0.390719,0.273312,-0.225439
7,0.043699,-0.040151,-0.394495,0.022033
8,0.141149,-0.064651,-0.279638,0.456184
9,0.006065,0.143651,-0.145783,-0.329834
10,-0.286142,-0.002587,0.528855,-0.485697


In [87]:
# Hay mucha flexibilidad en el manejo de las fechas para series de tiempo.

rango = pd.date_range("1/1/2012", periods = 200, freq = "S")
ts = pd.Series(np.random.randint( 0, 500, len(rango) ), index = rango)

print(ts)

2012-01-01 00:00:00    229
2012-01-01 00:00:01    470
2012-01-01 00:00:02    442
2012-01-01 00:00:03    403
2012-01-01 00:00:04    317
                      ... 
2012-01-01 00:03:15     42
2012-01-01 00:03:16    220
2012-01-01 00:03:17    460
2012-01-01 00:03:18    324
2012-01-01 00:03:19    161
Freq: S, Length: 200, dtype: int64


In [88]:
# Hacemos un resampleo.

print( ts.resample("1Min").sum() )

2012-01-01 00:00:00    15192
2012-01-01 00:01:00    13268
2012-01-01 00:02:00    13035
2012-01-01 00:03:00     5574
Freq: T, dtype: int64


In [89]:
# Leer y escribir archivos es muy fácil con 
# el formato csv, compatible con Excel.

# Escribir.
ts.name = "Datos"
ts.index.name = "Timestamp"
ts.to_csv("time_series.csv")


In [90]:
# Leer archivo.

ts_2 = pd.read_csv("time_series.csv", index_col = "Timestamp")

print(ts_2)

                     Datos
Timestamp                 
2012-01-01 00:00:00    229
2012-01-01 00:00:01    470
2012-01-01 00:00:02    442
2012-01-01 00:00:03    403
2012-01-01 00:00:04    317
...                    ...
2012-01-01 00:03:15     42
2012-01-01 00:03:16    220
2012-01-01 00:03:17    460
2012-01-01 00:03:18    324
2012-01-01 00:03:19    161

[200 rows x 1 columns]


In [91]:
print( ts_2.index )

Index(['2012-01-01 00:00:00', '2012-01-01 00:00:01', '2012-01-01 00:00:02',
       '2012-01-01 00:00:03', '2012-01-01 00:00:04', '2012-01-01 00:00:05',
       '2012-01-01 00:00:06', '2012-01-01 00:00:07', '2012-01-01 00:00:08',
       '2012-01-01 00:00:09',
       ...
       '2012-01-01 00:03:10', '2012-01-01 00:03:11', '2012-01-01 00:03:12',
       '2012-01-01 00:03:13', '2012-01-01 00:03:14', '2012-01-01 00:03:15',
       '2012-01-01 00:03:16', '2012-01-01 00:03:17', '2012-01-01 00:03:18',
       '2012-01-01 00:03:19'],
      dtype='object', name='Timestamp', length=200)


In [92]:
ts_2.index = pd.to_datetime( ts_2.index )

print(ts_2.index)

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               ...
               '2012-01-01 00:03:10', '2012-01-01 00:03:11',
               '2012-01-01 00:03:12', '2012-01-01 00:03:13',
               '2012-01-01 00:03:14', '2012-01-01 00:03:15',
               '2012-01-01 00:03:16', '2012-01-01 00:03:17',
               '2012-01-01 00:03:18', '2012-01-01 00:03:19'],
              dtype='datetime64[ns]', name='Timestamp', length=200, freq=None)


In [93]:
# Correlación

# Creamos un dataframe aleatorio.
df = pd.DataFrame(np.random.randn(1000,2), columns = ["Col_1", "Col_2"])

print(df.head())
print()

# Matriz de correlación
print( df.corr() )
print()

# Matriz de covarianza
print( df.cov() )
print()

      Col_1     Col_2
0 -0.167635 -0.801782
1  0.276739 -0.511667
2 -1.114099 -0.640085
3  0.308471  0.300934
4 -0.328477 -1.706633

          Col_1     Col_2
Col_1  1.000000  0.000733
Col_2  0.000733  1.000000

          Col_1     Col_2
Col_1  0.994865  0.000739
Col_2  0.000739  1.021994



In [94]:
# Concatenar dataframes.

# Creamos un dataframe aleatorio.
df_1 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])
df_2 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])

print(df_1.head())
print()
print(df_2.head())
print()

# Concatenamos.
df = pd.concat([df_1, df_2], axis = 0, ignore_index = True)

# Reiniciamos el índice.
df = df.reset_index(drop = True)

print(df)

      Col_1     Col_2
0 -0.433675  0.998925
1 -0.669555 -1.652491
2  1.749122 -0.284538
3 -2.284783 -0.566064
4 -0.317420  0.722254

      Col_1     Col_2
0  0.439650 -0.099283
1 -1.303901  0.082103
2 -0.472249 -1.031095
3 -0.625879  0.306549
4  1.665371 -0.214742

        Col_1     Col_2
0   -0.433675  0.998925
1   -0.669555 -1.652491
2    1.749122 -0.284538
3   -2.284783 -0.566064
4   -0.317420  0.722254
..        ...       ...
195 -0.179022 -1.057300
196  0.849659  2.526274
197  0.382693  0.230633
198  0.592083  1.801855
199 -0.237396 -0.451695

[200 rows x 2 columns]


In [95]:
# Concatenar dataframes.

# Creamos un dataframe aleatorio.
df_1 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])
df_2 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_3", "Col_4"])

print(df_1.head())
print()
print(df_2.head())
print()

# Concatenamos.
df = pd.concat([df_1, df_2], axis = 1)

print(df)

      Col_1     Col_2
0  0.248384 -1.469891
1  0.540045  1.232635
2 -0.564560  0.252825
3  0.316136 -0.528789
4 -0.680130 -0.401939

      Col_3     Col_4
0 -0.106692 -0.456279
1  0.329974  0.279194
2 -0.288887  1.436253
3  0.097060  0.837448
4  1.001060  0.496942

       Col_1     Col_2     Col_3     Col_4
0   0.248384 -1.469891 -0.106692 -0.456279
1   0.540045  1.232635  0.329974  0.279194
2  -0.564560  0.252825 -0.288887  1.436253
3   0.316136 -0.528789  0.097060  0.837448
4  -0.680130 -0.401939  1.001060  0.496942
..       ...       ...       ...       ...
95  1.138090  1.540096 -0.652882  0.841344
96 -1.801081  1.029975  0.509095 -0.503295
97 -0.172826 -1.329791 -2.439784 -0.758918
98 -0.815126  1.157758  1.551319  0.300145
99 -1.878599 -0.406990  0.450245  1.996175

[100 rows x 4 columns]


In [96]:
# Cargar el archivo Electrico_residencial_2010_2017
# Selecciona las entradas correspondientes a Baja California.
# Ordena los datos de menor a mayor consumo en 2010.
# Selecciona los municipios con una cantidad de usiarios en 2012 menor a 100
# Encuentra el municipio con mayor consumo en tarifa DAC.
# Selecciona solo la tarifa DAC.
# Encuentra la correlación entre el consumo de 2012 y 2013