In [1]:
# Pandas es el Excel de python.
# Obtenido de: https://pandas.pydata.org/docs/user_guide/10min.html

import pandas as pd
import numpy as np

In [2]:
# Una columna en pandas se llama una serie
# np.nan es un valor faltante
# Cada elemento tiene un índice.

s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [3]:
# El índice puede ser una fecha

fechas = pd.date_range("20130101", periods = 6, freq = "D")

print(fechas)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')


In [4]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index = fechas, name = "datos")

print(s)

2013-01-01    1.0
2013-01-02    3.0
2013-01-03    5.0
2013-01-04    NaN
2013-01-05    6.0
2013-01-06    8.0
Freq: D, Name: datos, dtype: float64


In [5]:
# Un dataframe es un conjunto de columnas.

# creado a partir de series.
df = pd.DataFrame( np.random.randn(6, 4), index = fechas,
    columns = ["A", "B", "C", "D"] )

print(df)

                   A         B         C         D
2013-01-01  0.700287  0.106060 -0.866704  0.400799
2013-01-02  0.248242  0.704857 -0.788719  1.310796
2013-01-03  0.670712 -0.312124  1.103823 -1.951334
2013-01-04 -0.499327  1.055084  0.578908 -0.466273
2013-01-05  0.245457 -1.152776  1.365896  0.660772
2013-01-06 -0.295343  0.412956 -0.286058 -1.674446


In [6]:
# head() y tail() muestran la parte 
# superior e inferior del Dataframe.

print(df.head())
print(df.tail())

                   A         B         C         D
2013-01-01  0.700287  0.106060 -0.866704  0.400799
2013-01-02  0.248242  0.704857 -0.788719  1.310796
2013-01-03  0.670712 -0.312124  1.103823 -1.951334
2013-01-04 -0.499327  1.055084  0.578908 -0.466273
2013-01-05  0.245457 -1.152776  1.365896  0.660772
                   A         B         C         D
2013-01-02  0.248242  0.704857 -0.788719  1.310796
2013-01-03  0.670712 -0.312124  1.103823 -1.951334
2013-01-04 -0.499327  1.055084  0.578908 -0.466273
2013-01-05  0.245457 -1.152776  1.365896  0.660772
2013-01-06 -0.295343  0.412956 -0.286058 -1.674446


In [7]:
# Podemos acceder al índice y las columnas.

print(df.index)
print(df.columns)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
Index(['A', 'B', 'C', 'D'], dtype='object')


In [8]:
# Trasponer.

print(df.T)

   2013-01-01  2013-01-02  2013-01-03  2013-01-04  2013-01-05  2013-01-06
A    0.700287    0.248242    0.670712   -0.499327    0.245457   -0.295343
B    0.106060    0.704857   -0.312124    1.055084   -1.152776    0.412956
C   -0.866704   -0.788719    1.103823    0.578908    1.365896   -0.286058
D    0.400799    1.310796   -1.951334   -0.466273    0.660772   -1.674446


In [9]:
# Convertir a un arreglo de numpy.

print( df.to_numpy() )

[[ 0.70028669  0.10606049 -0.86670352  0.40079927]
 [ 0.24824246  0.70485705 -0.78871936  1.3107959 ]
 [ 0.67071218 -0.31212374  1.10382288 -1.9513342 ]
 [-0.49932747  1.0550844   0.57890758 -0.46627343]
 [ 0.2454575  -1.15277639  1.36589592  0.66077218]
 [-0.2953426   0.41295553 -0.28605829 -1.67444574]]


In [10]:
# Estadística descriptiva.

print( df.describe() )

              A         B         C         D
count  6.000000  6.000000  6.000000  6.000000
mean   0.178338  0.135676  0.184524 -0.286614
std    0.491499  0.788375  0.966439  1.315404
min   -0.499327 -1.152776 -0.866704 -1.951334
25%   -0.160143 -0.207578 -0.663054 -1.372403
50%    0.246850  0.259508  0.146425 -0.032737
75%    0.565095  0.631882  0.972594  0.595779
max    0.700287  1.055084  1.365896  1.310796


In [11]:
# Ordenar datos.

# Por índice.

print( df.sort_index(ascending = False) )

                   A         B         C         D
2013-01-06 -0.295343  0.412956 -0.286058 -1.674446
2013-01-05  0.245457 -1.152776  1.365896  0.660772
2013-01-04 -0.499327  1.055084  0.578908 -0.466273
2013-01-03  0.670712 -0.312124  1.103823 -1.951334
2013-01-02  0.248242  0.704857 -0.788719  1.310796
2013-01-01  0.700287  0.106060 -0.866704  0.400799


In [12]:
# Por columna.
print( df.sort_values(by = "B") )

                   A         B         C         D
2013-01-05  0.245457 -1.152776  1.365896  0.660772
2013-01-03  0.670712 -0.312124  1.103823 -1.951334
2013-01-01  0.700287  0.106060 -0.866704  0.400799
2013-01-06 -0.295343  0.412956 -0.286058 -1.674446
2013-01-02  0.248242  0.704857 -0.788719  1.310796
2013-01-04 -0.499327  1.055084  0.578908 -0.466273


In [13]:
# Escoger una columna.

print( df["A"] )

2013-01-01    0.700287
2013-01-02    0.248242
2013-01-03    0.670712
2013-01-04   -0.499327
2013-01-05    0.245457
2013-01-06   -0.295343
Freq: D, Name: A, dtype: float64


In [14]:
# Escoger una fila.

print( df.loc[df.index[0]] )

A    0.700287
B    0.106060
C   -0.866704
D    0.400799
Name: 2013-01-01 00:00:00, dtype: float64


In [15]:
# Escoger por número de fila.

print( df.iloc[0] )

A    0.700287
B    0.106060
C   -0.866704
D    0.400799
Name: 2013-01-01 00:00:00, dtype: float64


In [16]:
# Escoger por fila y columna.

print( df.loc[df.index[2], "B"] )

-0.3121237440838935


In [17]:
# Escoger por número de fila y columna.

print( df.iloc[2, 1] )

-0.3121237440838935


In [18]:
# Escoger una subsección del dataframe.

df_2 = df.iloc[2:4, 1:3]
print( df_2 )

                   B         C
2013-01-03 -0.312124  1.103823
2013-01-04  1.055084  0.578908


In [19]:
df_2 = df.iloc[2:4, :]
print( df_2 )

# Las copias y vistas funcionan igual que en numpy!!

                   A         B         C         D
2013-01-03  0.670712 -0.312124  1.103823 -1.951334
2013-01-04 -0.499327  1.055084  0.578908 -0.466273


In [20]:
# Búsqueda por condiciones.

# En una columna.
print( df[df["A"] > 0] )

                   A         B         C         D
2013-01-01  0.700287  0.106060 -0.866704  0.400799
2013-01-02  0.248242  0.704857 -0.788719  1.310796
2013-01-03  0.670712 -0.312124  1.103823 -1.951334
2013-01-05  0.245457 -1.152776  1.365896  0.660772


In [21]:
# En todo el dataframe.

print( df[df>0] )

                   A         B         C         D
2013-01-01  0.700287  0.106060       NaN  0.400799
2013-01-02  0.248242  0.704857       NaN  1.310796
2013-01-03  0.670712       NaN  1.103823       NaN
2013-01-04       NaN  1.055084  0.578908       NaN
2013-01-05  0.245457       NaN  1.365896  0.660772
2013-01-06       NaN  0.412956       NaN       NaN


In [22]:
# Agregar columnas.
# Todos los elementos de una columna son del mismo tipo.
# Las columnas pueden tener distintos tipos.

df["E"] = ["uno", "uno", "dos", "tres", "cuatro", "tres"]

print(df)

                   A         B         C         D       E
2013-01-01  0.700287  0.106060 -0.866704  0.400799     uno
2013-01-02  0.248242  0.704857 -0.788719  1.310796     uno
2013-01-03  0.670712 -0.312124  1.103823 -1.951334     dos
2013-01-04 -0.499327  1.055084  0.578908 -0.466273    tres
2013-01-05  0.245457 -1.152776  1.365896  0.660772  cuatro
2013-01-06 -0.295343  0.412956 -0.286058 -1.674446    tres


In [23]:
# Nueva columna con arreglos.

df["F"] = np.linspace(1, 5, 6)
print(df.head())

                   A         B         C         D       E    F
2013-01-01  0.700287  0.106060 -0.866704  0.400799     uno  1.0
2013-01-02  0.248242  0.704857 -0.788719  1.310796     uno  1.8
2013-01-03  0.670712 -0.312124  1.103823 -1.951334     dos  2.6
2013-01-04 -0.499327  1.055084  0.578908 -0.466273    tres  3.4
2013-01-05  0.245457 -1.152776  1.365896  0.660772  cuatro  4.2


In [24]:
# Buscar valores específicos con isin()

print( df[df["E"].isin(["dos", "cuatro"])] )

                   A         B         C         D       E    F
2013-01-03  0.670712 -0.312124  1.103823 -1.951334     dos  2.6
2013-01-05  0.245457 -1.152776  1.365896  0.660772  cuatro  4.2


In [25]:
# Podemos cambiar un valor específico.

df.iloc[0, 1] = 10
print(df)

                   A          B         C         D       E    F
2013-01-01  0.700287  10.000000 -0.866704  0.400799     uno  1.0
2013-01-02  0.248242   0.704857 -0.788719  1.310796     uno  1.8
2013-01-03  0.670712  -0.312124  1.103823 -1.951334     dos  2.6
2013-01-04 -0.499327   1.055084  0.578908 -0.466273    tres  3.4
2013-01-05  0.245457  -1.152776  1.365896  0.660772  cuatro  4.2
2013-01-06 -0.295343   0.412956 -0.286058 -1.674446    tres  5.0


In [26]:
# Quitar columna.

df = df.drop("E", axis = 1)
print(df)

                   A          B         C         D    F
2013-01-01  0.700287  10.000000 -0.866704  0.400799  1.0
2013-01-02  0.248242   0.704857 -0.788719  1.310796  1.8
2013-01-03  0.670712  -0.312124  1.103823 -1.951334  2.6
2013-01-04 -0.499327   1.055084  0.578908 -0.466273  3.4
2013-01-05  0.245457  -1.152776  1.365896  0.660772  4.2
2013-01-06 -0.295343   0.412956 -0.286058 -1.674446  5.0


In [27]:
# Establecer valores con condicional.

df[df<0] = 0
print(df)

                   A          B         C         D    F
2013-01-01  0.700287  10.000000  0.000000  0.400799  1.0
2013-01-02  0.248242   0.704857  0.000000  1.310796  1.8
2013-01-03  0.670712   0.000000  1.103823  0.000000  2.6
2013-01-04  0.000000   1.055084  0.578908  0.000000  3.4
2013-01-05  0.245457   0.000000  1.365896  0.660772  4.2
2013-01-06  0.000000   0.412956  0.000000  0.000000  5.0


In [28]:
# Agregamos algunos valores nulos.

df[df>2] = np.nan
print(df)

                   A         B         C         D    F
2013-01-01  0.700287       NaN  0.000000  0.400799  1.0
2013-01-02  0.248242  0.704857  0.000000  1.310796  1.8
2013-01-03  0.670712  0.000000  1.103823  0.000000  NaN
2013-01-04  0.000000  1.055084  0.578908  0.000000  NaN
2013-01-05  0.245457  0.000000  1.365896  0.660772  NaN
2013-01-06  0.000000  0.412956  0.000000  0.000000  NaN


In [29]:
# Quitar filas con valores nulos.
print(df.dropna())

# Rellenar valores nulos.
print(df.fillna(-100))

# Determinar valores nulos.
print(df.isna())

                   A         B    C         D    F
2013-01-02  0.248242  0.704857  0.0  1.310796  1.8
                   A           B         C         D      F
2013-01-01  0.700287 -100.000000  0.000000  0.400799    1.0
2013-01-02  0.248242    0.704857  0.000000  1.310796    1.8
2013-01-03  0.670712    0.000000  1.103823  0.000000 -100.0
2013-01-04  0.000000    1.055084  0.578908  0.000000 -100.0
2013-01-05  0.245457    0.000000  1.365896  0.660772 -100.0
2013-01-06  0.000000    0.412956  0.000000  0.000000 -100.0
                A      B      C      D      F
2013-01-01  False   True  False  False  False
2013-01-02  False  False  False  False  False
2013-01-03  False  False  False  False   True
2013-01-04  False  False  False  False   True
2013-01-05  False  False  False  False   True
2013-01-06  False  False  False  False   True


In [30]:
# operaciones.

# Entre columnas.
df["G"] = df["A"] + df["C"]

print(df)

# promedio
print(df.mean())
print(df.mean(1))

                   A         B         C         D    F         G
2013-01-01  0.700287       NaN  0.000000  0.400799  1.0  0.700287
2013-01-02  0.248242  0.704857  0.000000  1.310796  1.8  0.248242
2013-01-03  0.670712  0.000000  1.103823  0.000000  NaN  1.774535
2013-01-04  0.000000  1.055084  0.578908  0.000000  NaN  0.578908
2013-01-05  0.245457  0.000000  1.365896  0.660772  NaN  1.611353
2013-01-06  0.000000  0.412956  0.000000  0.000000  NaN  0.000000
A    0.310783
B    0.434579
C    0.508104
D    0.395395
F    1.400000
G    0.818888
dtype: float64
2013-01-01    0.560275
2013-01-02    0.718690
2013-01-03    0.709814
2013-01-04    0.442580
2013-01-05    0.776696
2013-01-06    0.082591
Freq: D, dtype: float64


In [31]:
# Aplicar operación.
# np.cumsum() hace una suma acumulada.
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F,G
2013-01-01,0.700287,,0.0,0.400799,1.0,0.700287
2013-01-02,0.948529,0.704857,0.0,1.711595,2.8,0.948529
2013-01-03,1.619241,0.704857,1.103823,1.711595,,2.723064
2013-01-04,1.619241,1.759941,1.68273,1.711595,,3.301972
2013-01-05,1.864699,1.759941,3.048626,2.372367,,4.913325
2013-01-06,1.864699,2.172897,3.048626,2.372367,,4.913325


In [32]:
# Logaritmo a cada elemento.
df["H"] = np.log( df["C"] )

df

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,A,B,C,D,F,G,H
2013-01-01,0.700287,,0.0,0.400799,1.0,0.700287,-inf
2013-01-02,0.248242,0.704857,0.0,1.310796,1.8,0.248242,-inf
2013-01-03,0.670712,0.0,1.103823,0.0,,1.774535,0.098779
2013-01-04,0.0,1.055084,0.578908,0.0,,0.578908,-0.546612
2013-01-05,0.245457,0.0,1.365896,0.660772,,1.611353,0.311811
2013-01-06,0.0,0.412956,0.0,0.0,,0.0,-inf


In [33]:
# Recorrer una columna una cierta cantidad de filas.

df["I"] = df["C"].shift(2)

df

Unnamed: 0,A,B,C,D,F,G,H,I
2013-01-01,0.700287,,0.0,0.400799,1.0,0.700287,-inf,
2013-01-02,0.248242,0.704857,0.0,1.310796,1.8,0.248242,-inf,
2013-01-03,0.670712,0.0,1.103823,0.0,,1.774535,0.098779,0.0
2013-01-04,0.0,1.055084,0.578908,0.0,,0.578908,-0.546612,0.0
2013-01-05,0.245457,0.0,1.365896,0.660772,,1.611353,0.311811,1.103823
2013-01-06,0.0,0.412956,0.0,0.0,,0.0,-inf,0.578908


In [34]:
# Se puede obtener un histograma.

s = pd.Series(np.random.randint(0, 7, size = 10))
print (s)


0    0
1    0
2    2
3    4
4    4
5    3
6    0
7    4
8    6
9    3
dtype: int64


In [35]:
print(s.value_counts())

0    3
4    3
3    2
2    1
6    1
dtype: int64


In [36]:
# Agrupar datos

fechas = pd.date_range("20130101", periods = 200, freq = "D")
df = pd.DataFrame( np.random.randn(200, 4), index = fechas,
    columns = ["A", "B", "C", "D"] )

print(df)

                   A         B         C         D
2013-01-01  0.378988  0.114256 -0.183128 -0.436035
2013-01-02 -0.432302 -0.051768 -0.030623 -2.160883
2013-01-03  0.305397  0.207042 -0.218787  0.598337
2013-01-04  1.396758  0.397797  0.263895  0.081114
2013-01-05  0.682928  0.123097 -0.801697  0.546532
...              ...       ...       ...       ...
2013-07-15  1.995105  0.442287 -2.161827  0.909218
2013-07-16  0.392429 -0.790886 -1.246562 -0.177115
2013-07-17 -0.649010  1.769364  0.362228  1.229272
2013-07-18 -0.151900  0.519274 -0.966547 -0.037173
2013-07-19  0.075832  0.116789 -0.302677  0.364927

[200 rows x 4 columns]


In [37]:
# Agrupamos y realizamos una operación.
df.groupby(df.index.month).sum()

Unnamed: 0,A,B,C,D
1,4.457227,2.115958,3.977852,-0.53273
2,-0.253663,-2.988084,-4.857697,3.531936
3,7.86105,6.727287,9.296932,-4.609883
4,1.462494,1.627626,6.857631,8.142079
5,0.36719,4.232605,-4.819755,-7.730979
6,-1.202336,3.874938,-2.020953,-12.974006
7,5.102971,-2.502698,-4.742694,7.078165


In [38]:
df.groupby(df.index.day).mean()

Unnamed: 0,A,B,C,D
1,-0.124838,-0.883067,0.031716,-0.108427
2,0.023397,-0.088267,-0.108538,-0.358604
3,-0.141949,0.926936,-0.647855,-0.060228
4,0.733734,-0.105589,0.486748,0.503345
5,0.062603,-0.223528,0.233028,-0.046292
6,0.002999,0.371651,-0.220372,-0.467411
7,0.12545,0.479706,0.560692,0.046907
8,0.305257,0.364743,-0.749827,0.427963
9,0.075706,-0.95015,-0.387214,-0.544318
10,0.173953,-0.184918,0.313973,-0.386691


In [39]:
# Hay mucha flexibilidad en el manejo de las fechas para series de tiempo.

rango = pd.date_range("1/1/2012", periods = 200, freq = "S")
ts = pd.Series(np.random.randint( 0, 500, len(rango) ), index = rango)

print(ts)

2012-01-01 00:00:00     90
2012-01-01 00:00:01    354
2012-01-01 00:00:02    194
2012-01-01 00:00:03     42
2012-01-01 00:00:04    250
                      ... 
2012-01-01 00:03:15    280
2012-01-01 00:03:16    441
2012-01-01 00:03:17     79
2012-01-01 00:03:18    352
2012-01-01 00:03:19    243
Freq: S, Length: 200, dtype: int64


In [40]:
# Hacemos un resampleo.

print( ts.resample("1Min").sum() )

2012-01-01 00:00:00    13934
2012-01-01 00:01:00    13624
2012-01-01 00:02:00    13904
2012-01-01 00:03:00     4916
Freq: T, dtype: int64


In [41]:
# Leer y escribir archivos es muy fácil con 
# el formato csv, compatible con Excel.

# Escribir.
ts.name = "Datos"
ts.index.name = "Timestamp"
ts.to_csv("time_series.csv")


In [42]:
# Leer archivo.

ts_2 = pd.read_csv("time_series.csv", index_col = "Timestamp")

print(ts_2)

                     Datos
Timestamp                 
2012-01-01 00:00:00     90
2012-01-01 00:00:01    354
2012-01-01 00:00:02    194
2012-01-01 00:00:03     42
2012-01-01 00:00:04    250
...                    ...
2012-01-01 00:03:15    280
2012-01-01 00:03:16    441
2012-01-01 00:03:17     79
2012-01-01 00:03:18    352
2012-01-01 00:03:19    243

[200 rows x 1 columns]


In [43]:
print( ts_2.index )

Index(['2012-01-01 00:00:00', '2012-01-01 00:00:01', '2012-01-01 00:00:02',
       '2012-01-01 00:00:03', '2012-01-01 00:00:04', '2012-01-01 00:00:05',
       '2012-01-01 00:00:06', '2012-01-01 00:00:07', '2012-01-01 00:00:08',
       '2012-01-01 00:00:09',
       ...
       '2012-01-01 00:03:10', '2012-01-01 00:03:11', '2012-01-01 00:03:12',
       '2012-01-01 00:03:13', '2012-01-01 00:03:14', '2012-01-01 00:03:15',
       '2012-01-01 00:03:16', '2012-01-01 00:03:17', '2012-01-01 00:03:18',
       '2012-01-01 00:03:19'],
      dtype='object', name='Timestamp', length=200)


In [44]:
ts_2.index = pd.to_datetime( ts_2.index )

print(ts_2.index)

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               ...
               '2012-01-01 00:03:10', '2012-01-01 00:03:11',
               '2012-01-01 00:03:12', '2012-01-01 00:03:13',
               '2012-01-01 00:03:14', '2012-01-01 00:03:15',
               '2012-01-01 00:03:16', '2012-01-01 00:03:17',
               '2012-01-01 00:03:18', '2012-01-01 00:03:19'],
              dtype='datetime64[ns]', name='Timestamp', length=200, freq=None)


In [45]:
# Correlación

# Creamos un dataframe aleatorio.
df = pd.DataFrame(np.random.randn(1000,2), columns = ["Col_1", "Col_2"])

print(df.head())
print()

# Matriz de correlación
print( df.corr() )
print()

# Matriz de covarianza
print( df.cov() )
print()

# Autocorrelación.
print(df["Col_1"].autocorr(lag = 1))

      Col_1     Col_2
0  1.688429 -0.716003
1  0.779820 -0.015046
2 -0.054194  0.615313
3 -0.566583  0.291915
4  1.121626  0.131290

          Col_1     Col_2
Col_1  1.000000  0.001794
Col_2  0.001794  1.000000

          Col_1     Col_2
Col_1  1.048496  0.001826
Col_2  0.001826  0.988756

-0.01563882483724106


In [46]:
# Concatenar dataframes.

# Creamos un dataframe aleatorio.
df_1 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])
df_2 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])

print(df_1.head())
print()
print(df_2.head())
print()

# Concatenamos.
df = pd.concat([df_1, df_2], axis = 0, ignore_index = True)

# Reiniciamos el índice.
df = df.reset_index(drop = True)

print(df)

      Col_1     Col_2
0  0.895022 -0.445394
1  0.531045  1.289390
2  1.603603 -1.283008
3 -0.268152  1.826182
4 -1.418714 -0.490321

      Col_1     Col_2
0 -0.857374  1.035646
1  0.146367 -0.064369
2  1.717349 -1.075337
3 -0.233492 -1.554533
4 -0.252572 -0.980573

        Col_1     Col_2
0    0.895022 -0.445394
1    0.531045  1.289390
2    1.603603 -1.283008
3   -0.268152  1.826182
4   -1.418714 -0.490321
..        ...       ...
195  1.295268  1.048177
196  0.498974 -1.396414
197 -0.595539 -1.120585
198  0.359538  0.387093
199 -1.472849  0.181375

[200 rows x 2 columns]


In [47]:
# Concatenar dataframes.

# Creamos un dataframe aleatorio.
df_1 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])
df_2 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_3", "Col_4"])

print(df_1.head())
print()
print(df_2.head())
print()

# Concatenamos.
df = pd.concat([df_1, df_2], axis = 1)

print(df)

      Col_1     Col_2
0 -2.625500 -0.393720
1  1.190954 -0.362425
2  0.463621 -0.476837
3 -0.401781 -0.778725
4 -1.164925 -1.041746

      Col_3     Col_4
0 -0.715134 -0.336828
1 -0.725068  1.011219
2 -0.407877  3.204771
3  0.756902  1.291022
4  1.296974 -0.527047

       Col_1     Col_2     Col_3     Col_4
0  -2.625500 -0.393720 -0.715134 -0.336828
1   1.190954 -0.362425 -0.725068  1.011219
2   0.463621 -0.476837 -0.407877  3.204771
3  -0.401781 -0.778725  0.756902  1.291022
4  -1.164925 -1.041746  1.296974 -0.527047
..       ...       ...       ...       ...
95 -0.750328  1.520645 -0.941781  1.584569
96 -0.247350 -0.684965  0.074305  1.498597
97  0.265750  1.755498 -1.913953 -0.944842
98  0.134145  0.515333  0.314255  0.026398
99 -0.003530 -0.884255 -1.093853  1.223163

[100 rows x 4 columns]


In [48]:
# Cargar el archivo Electrico_residencial_2010_2017
# Selecciona las entradas correspondientes a Baja California.
# Ordena los datos de menor a mayor consumo en 2010.
# Selecciona los municipios con una cantidad de usiarios en 2012 menor a 100
# Encuentra el municipio con mayor consumo en tarifa DAC.
# Selecciona solo la tarifa DAC.
# Encuentra la correlación entre el consumo de 2012 y 2013