In [1]:
# Pandas es el Excel de python.
# Obtenido de: https://pandas.pydata.org/docs/user_guide/10min.html

import pandas as pd
import numpy as np

In [2]:
# Una columna en pandas se llama una serie
# np.nan es un valor faltante
# Cada elemento tiene un índice.

s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [3]:
# El índice puede ser una fecha

fechas = pd.date_range("20130101", periods = 6, freq = "D")

print(fechas)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')


In [4]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index = fechas, name = "datos")

print(s)

2013-01-01    1.0
2013-01-02    3.0
2013-01-03    5.0
2013-01-04    NaN
2013-01-05    6.0
2013-01-06    8.0
Freq: D, Name: datos, dtype: float64


In [5]:
# Un dataframe es un conjunto de columnas.

# creado a partir de series.
df = pd.DataFrame( np.random.randn(6, 4), index = fechas,
    columns = ["A", "B", "C", "D"] )

print(df)

                   A         B         C         D
2013-01-01  3.007878 -0.909170 -0.859399  0.182561
2013-01-02  1.131851  0.734660 -0.022333  0.211655
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547
2013-01-04 -1.975218  0.442906 -1.090930 -1.357888
2013-01-05  0.643082 -1.543886  0.219861 -0.552904
2013-01-06  0.083203  2.232580 -0.200007  1.491007


In [6]:
# head() y tail() muestran la parte 
# superior e inferior del Dataframe.

print(df.head())
print(df.tail())

                   A         B         C         D
2013-01-01  3.007878 -0.909170 -0.859399  0.182561
2013-01-02  1.131851  0.734660 -0.022333  0.211655
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547
2013-01-04 -1.975218  0.442906 -1.090930 -1.357888
2013-01-05  0.643082 -1.543886  0.219861 -0.552904
                   A         B         C         D
2013-01-02  1.131851  0.734660 -0.022333  0.211655
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547
2013-01-04 -1.975218  0.442906 -1.090930 -1.357888
2013-01-05  0.643082 -1.543886  0.219861 -0.552904
2013-01-06  0.083203  2.232580 -0.200007  1.491007


In [7]:
# Podemos acceder al índice y las columnas.

print(df.index)
print(df.columns)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
Index(['A', 'B', 'C', 'D'], dtype='object')


In [8]:
# Trasponer.

print(df.T)

   2013-01-01  2013-01-02  2013-01-03  2013-01-04  2013-01-05  2013-01-06
A    3.007878    1.131851    1.342641   -1.975218    0.643082    0.083203
B   -0.909170    0.734660   -1.051362    0.442906   -1.543886    2.232580
C   -0.859399   -0.022333   -1.170588   -1.090930    0.219861   -0.200007
D    0.182561    0.211655   -0.531547   -1.357888   -0.552904    1.491007


In [9]:
# Convertir a un arreglo de numpy.

print( df.to_numpy() )

[[ 3.0078783  -0.90916954 -0.85939853  0.18256081]
 [ 1.1318511   0.73466008 -0.02233323  0.21165544]
 [ 1.34264083 -1.05136249 -1.17058846 -0.53154656]
 [-1.975218    0.4429058  -1.09092988 -1.35788841]
 [ 0.64308197 -1.54388628  0.21986133 -0.55290415]
 [ 0.08320303  2.23257962 -0.20000655  1.49100659]]


In [10]:
# Estadística descriptiva.

print( df.describe() )

              A         B         C         D
count  6.000000  6.000000  6.000000  6.000000
mean   0.705573 -0.015712 -0.520566 -0.092853
std    1.641101  1.416641  0.593612  0.967637
min   -1.975218 -1.543886 -1.170588 -1.357888
25%    0.223173 -1.015814 -1.033047 -0.547565
50%    0.887467 -0.233132 -0.529703 -0.174493
75%    1.289943  0.661722 -0.066752  0.204382
max    3.007878  2.232580  0.219861  1.491007


In [11]:
# Ordenar datos.

# Por índice.

print( df.sort_index(ascending = False) )

                   A         B         C         D
2013-01-06  0.083203  2.232580 -0.200007  1.491007
2013-01-05  0.643082 -1.543886  0.219861 -0.552904
2013-01-04 -1.975218  0.442906 -1.090930 -1.357888
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547
2013-01-02  1.131851  0.734660 -0.022333  0.211655
2013-01-01  3.007878 -0.909170 -0.859399  0.182561


In [12]:
# Por columna.
print( df.sort_values(by = "B") )

                   A         B         C         D
2013-01-05  0.643082 -1.543886  0.219861 -0.552904
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547
2013-01-01  3.007878 -0.909170 -0.859399  0.182561
2013-01-04 -1.975218  0.442906 -1.090930 -1.357888
2013-01-02  1.131851  0.734660 -0.022333  0.211655
2013-01-06  0.083203  2.232580 -0.200007  1.491007


In [13]:
# Escoger una columna.

print( df["A"] )

2013-01-01    3.007878
2013-01-02    1.131851
2013-01-03    1.342641
2013-01-04   -1.975218
2013-01-05    0.643082
2013-01-06    0.083203
Freq: D, Name: A, dtype: float64


In [14]:
# Escoger una fila.

print( df.loc[df.index[0]] )

A    3.007878
B   -0.909170
C   -0.859399
D    0.182561
Name: 2013-01-01 00:00:00, dtype: float64


In [15]:
# Escoger por número de fila.

print( df.iloc[0] )

A    3.007878
B   -0.909170
C   -0.859399
D    0.182561
Name: 2013-01-01 00:00:00, dtype: float64


In [16]:
# Escoger por fila y columna.

print( df.loc[df.index[2], "B"] )

-1.0513624913877697


In [17]:
# Escoger por número de fila y columna.

print( df.iloc[2, 1] )

-1.0513624913877697


In [18]:
# Escoger una subsección del dataframe.

df_2 = df.iloc[2:4, 1:3]
print( df_2 )

                   B         C
2013-01-03 -1.051362 -1.170588
2013-01-04  0.442906 -1.090930


In [19]:
df_2 = df.iloc[2:4, :]
print( df_2 )

# Las copias y vistas funcionan igual que en numpy!!

                   A         B         C         D
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547
2013-01-04 -1.975218  0.442906 -1.090930 -1.357888


In [20]:
# Búsqueda por condiciones.

# En una columna.
print( df[df["A"] > 0] )

                   A         B         C         D
2013-01-01  3.007878 -0.909170 -0.859399  0.182561
2013-01-02  1.131851  0.734660 -0.022333  0.211655
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547
2013-01-05  0.643082 -1.543886  0.219861 -0.552904
2013-01-06  0.083203  2.232580 -0.200007  1.491007


In [21]:
# En todo el dataframe.

print( df[df>0] )

                   A         B         C         D
2013-01-01  3.007878       NaN       NaN  0.182561
2013-01-02  1.131851  0.734660       NaN  0.211655
2013-01-03  1.342641       NaN       NaN       NaN
2013-01-04       NaN  0.442906       NaN       NaN
2013-01-05  0.643082       NaN  0.219861       NaN
2013-01-06  0.083203  2.232580       NaN  1.491007


In [22]:
# Agregar columnas.
# Todos los elementos de una columna son del mismo tipo.
# Las columnas pueden tener distintos tipos.

df["E"] = ["uno", "uno", "dos", "tres", "cuatro", "tres"]

print(df)

                   A         B         C         D       E
2013-01-01  3.007878 -0.909170 -0.859399  0.182561     uno
2013-01-02  1.131851  0.734660 -0.022333  0.211655     uno
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547     dos
2013-01-04 -1.975218  0.442906 -1.090930 -1.357888    tres
2013-01-05  0.643082 -1.543886  0.219861 -0.552904  cuatro
2013-01-06  0.083203  2.232580 -0.200007  1.491007    tres


In [23]:
# Nueva columna con arreglos.

df["F"] = np.linspace(1, 5, 6)
print(df.head())

                   A         B         C         D       E    F
2013-01-01  3.007878 -0.909170 -0.859399  0.182561     uno  1.0
2013-01-02  1.131851  0.734660 -0.022333  0.211655     uno  1.8
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547     dos  2.6
2013-01-04 -1.975218  0.442906 -1.090930 -1.357888    tres  3.4
2013-01-05  0.643082 -1.543886  0.219861 -0.552904  cuatro  4.2


In [24]:
# Buscar valores específicos con isin()

print( df[df["E"].isin(["dos", "cuatro"])] )

                   A         B         C         D       E    F
2013-01-03  1.342641 -1.051362 -1.170588 -0.531547     dos  2.6
2013-01-05  0.643082 -1.543886  0.219861 -0.552904  cuatro  4.2


In [25]:
# Podemos cambiar un valor específico.

df.iloc[0, 1] = 10
print(df)

                   A          B         C         D       E    F
2013-01-01  3.007878  10.000000 -0.859399  0.182561     uno  1.0
2013-01-02  1.131851   0.734660 -0.022333  0.211655     uno  1.8
2013-01-03  1.342641  -1.051362 -1.170588 -0.531547     dos  2.6
2013-01-04 -1.975218   0.442906 -1.090930 -1.357888    tres  3.4
2013-01-05  0.643082  -1.543886  0.219861 -0.552904  cuatro  4.2
2013-01-06  0.083203   2.232580 -0.200007  1.491007    tres  5.0


In [26]:
# Quitar columna.

df = df.drop("E", axis = 1)
print(df)

                   A          B         C         D    F
2013-01-01  3.007878  10.000000 -0.859399  0.182561  1.0
2013-01-02  1.131851   0.734660 -0.022333  0.211655  1.8
2013-01-03  1.342641  -1.051362 -1.170588 -0.531547  2.6
2013-01-04 -1.975218   0.442906 -1.090930 -1.357888  3.4
2013-01-05  0.643082  -1.543886  0.219861 -0.552904  4.2
2013-01-06  0.083203   2.232580 -0.200007  1.491007  5.0


In [27]:
# Establecer valores con condicional.

df[df<0] = 0
print(df)

                   A          B         C         D    F
2013-01-01  3.007878  10.000000  0.000000  0.182561  1.0
2013-01-02  1.131851   0.734660  0.000000  0.211655  1.8
2013-01-03  1.342641   0.000000  0.000000  0.000000  2.6
2013-01-04  0.000000   0.442906  0.000000  0.000000  3.4
2013-01-05  0.643082   0.000000  0.219861  0.000000  4.2
2013-01-06  0.083203   2.232580  0.000000  1.491007  5.0


In [28]:
# Agregamos algunos valores nulos.

df[df>2] = np.nan
print(df)

                   A         B         C         D    F
2013-01-01       NaN       NaN  0.000000  0.182561  1.0
2013-01-02  1.131851  0.734660  0.000000  0.211655  1.8
2013-01-03  1.342641  0.000000  0.000000  0.000000  NaN
2013-01-04  0.000000  0.442906  0.000000  0.000000  NaN
2013-01-05  0.643082  0.000000  0.219861  0.000000  NaN
2013-01-06  0.083203       NaN  0.000000  1.491007  NaN


In [29]:
# Quitar filas con valores nulos.
print(df.dropna())

# Rellenar valores nulos.
print(df.fillna(-100))

# Determinar valores nulos.
print(df.isna())

                   A        B    C         D    F
2013-01-02  1.131851  0.73466  0.0  0.211655  1.8
                     A           B         C         D      F
2013-01-01 -100.000000 -100.000000  0.000000  0.182561    1.0
2013-01-02    1.131851    0.734660  0.000000  0.211655    1.8
2013-01-03    1.342641    0.000000  0.000000  0.000000 -100.0
2013-01-04    0.000000    0.442906  0.000000  0.000000 -100.0
2013-01-05    0.643082    0.000000  0.219861  0.000000 -100.0
2013-01-06    0.083203 -100.000000  0.000000  1.491007 -100.0
                A      B      C      D      F
2013-01-01   True   True  False  False  False
2013-01-02  False  False  False  False  False
2013-01-03  False  False  False  False   True
2013-01-04  False  False  False  False   True
2013-01-05  False  False  False  False   True
2013-01-06  False   True  False  False   True


In [30]:
# operaciones.

# Entre columnas.
df["G"] = df["A"] + df["C"]

print(df)

# promedio
print(df.mean())
print(df.mean(1))

                   A         B         C         D    F         G
2013-01-01       NaN       NaN  0.000000  0.182561  1.0       NaN
2013-01-02  1.131851  0.734660  0.000000  0.211655  1.8  1.131851
2013-01-03  1.342641  0.000000  0.000000  0.000000  NaN  1.342641
2013-01-04  0.000000  0.442906  0.000000  0.000000  NaN  0.000000
2013-01-05  0.643082  0.000000  0.219861  0.000000  NaN  0.862943
2013-01-06  0.083203       NaN  0.000000  1.491007  NaN  0.083203
A    0.640155
B    0.294391
C    0.036644
D    0.314204
F    1.400000
G    0.684128
dtype: float64
2013-01-01    0.394187
2013-01-02    0.835003
2013-01-03    0.537056
2013-01-04    0.088581
2013-01-05    0.345177
2013-01-06    0.414353
Freq: D, dtype: float64


In [31]:
# Aplicar operación.
# np.cumsum() hace una suma acumulada.
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F,G
2013-01-01,,,0.0,0.182561,1.0,
2013-01-02,1.131851,0.73466,0.0,0.394216,2.8,1.131851
2013-01-03,2.474492,0.73466,0.0,0.394216,,2.474492
2013-01-04,2.474492,1.177566,0.0,0.394216,,2.474492
2013-01-05,3.117574,1.177566,0.219861,0.394216,,3.337435
2013-01-06,3.200777,,0.219861,1.885223,,3.420638


In [32]:
# Logaritmo a cada elemento.
df["H"] = np.log( df["C"] )

df

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,A,B,C,D,F,G,H
2013-01-01,,,0.0,0.182561,1.0,,-inf
2013-01-02,1.131851,0.73466,0.0,0.211655,1.8,1.131851,-inf
2013-01-03,1.342641,0.0,0.0,0.0,,1.342641,-inf
2013-01-04,0.0,0.442906,0.0,0.0,,0.0,-inf
2013-01-05,0.643082,0.0,0.219861,0.0,,0.862943,-1.514758
2013-01-06,0.083203,,0.0,1.491007,,0.083203,-inf


In [33]:
# Recorrer una columna una cierta cantidad de filas.

df["I"] = df["C"].shift(2)

df

Unnamed: 0,A,B,C,D,F,G,H,I
2013-01-01,,,0.0,0.182561,1.0,,-inf,
2013-01-02,1.131851,0.73466,0.0,0.211655,1.8,1.131851,-inf,
2013-01-03,1.342641,0.0,0.0,0.0,,1.342641,-inf,0.0
2013-01-04,0.0,0.442906,0.0,0.0,,0.0,-inf,0.0
2013-01-05,0.643082,0.0,0.219861,0.0,,0.862943,-1.514758,0.0
2013-01-06,0.083203,,0.0,1.491007,,0.083203,-inf,0.0


In [34]:
# Se puede obtener un histograma.

s = pd.Series(np.random.randint(0, 7, size = 10))
print (s)


0    6
1    6
2    3
3    3
4    5
5    6
6    6
7    4
8    5
9    1
dtype: int64


In [35]:
print(s.value_counts())

6    4
3    2
5    2
4    1
1    1
dtype: int64


In [36]:
# Agrupar datos

fechas = pd.date_range("20130101", periods = 200, freq = "D")
df = pd.DataFrame( np.random.randn(200, 4), index = fechas,
    columns = ["A", "B", "C", "D"] )

print(df)

                   A         B         C         D
2013-01-01  0.225040  0.032689  2.000264  0.158546
2013-01-02 -0.253067 -0.823060  0.732808  0.562186
2013-01-03  0.036158  0.295104  0.053556  1.207154
2013-01-04 -0.253271  0.030089 -0.637101 -0.051489
2013-01-05 -0.131738  0.758541 -0.039777 -0.023115
...              ...       ...       ...       ...
2013-07-15 -2.872059 -1.330135 -0.094820  0.419263
2013-07-16  0.687347 -0.553696 -0.052474 -0.673264
2013-07-17  0.258002 -0.008737 -0.570560  0.691876
2013-07-18 -0.151862 -0.008466 -0.499816  0.315654
2013-07-19  0.866259  0.295905  0.173873 -0.268414

[200 rows x 4 columns]


In [37]:
# Agrupamos y realizamos una operación.
df.groupby(df.index.month).sum()

Unnamed: 0,A,B,C,D
1,4.522922,-2.086457,2.816427,0.388388
2,5.330515,4.658638,6.617321,-2.6171
3,4.838794,9.183483,3.340863,3.722874
4,1.976721,0.704714,-0.237758,-13.615722
5,13.202225,-2.934071,-4.014157,3.179861
6,6.86017,-6.064772,-5.256009,-1.281053
7,-0.925903,-1.605635,-4.546258,-0.568339


In [38]:
df.groupby(df.index.day).mean()

Unnamed: 0,A,B,C,D
1,-0.251839,-0.11892,0.041703,0.022354
2,0.158499,0.205744,0.301463,-0.352446
3,0.243061,0.107841,-0.317754,0.469486
4,0.236687,0.617257,0.056408,-0.227536
5,-0.105659,-0.528514,0.376754,0.173485
6,0.644699,-0.462295,0.587274,0.141083
7,0.630558,0.079664,-0.687527,0.069174
8,0.501805,-0.267741,0.158739,0.504529
9,-0.06104,0.086594,0.172205,1.026687
10,0.333744,1.161529,0.034939,-0.400194


In [39]:
# Hay mucha flexibilidad en el manejo de las fechas para series de tiempo.

rango = pd.date_range("1/1/2012", periods = 200, freq = "S")
ts = pd.Series(np.random.randint( 0, 500, len(rango) ), index = rango)

print(ts)

2012-01-01 00:00:00    184
2012-01-01 00:00:01    448
2012-01-01 00:00:02    323
2012-01-01 00:00:03     62
2012-01-01 00:00:04    405
                      ... 
2012-01-01 00:03:15    496
2012-01-01 00:03:16    225
2012-01-01 00:03:17    361
2012-01-01 00:03:18     95
2012-01-01 00:03:19    255
Freq: S, Length: 200, dtype: int64


In [40]:
# Hacemos un resampleo.

print( ts.resample("1Min").sum() )

2012-01-01 00:00:00    15286
2012-01-01 00:01:00    14741
2012-01-01 00:02:00    15024
2012-01-01 00:03:00     5231
Freq: T, dtype: int64


In [41]:
# Leer y escribir archivos es muy fácil con 
# el formato csv, compatible con Excel.

# Escribir.
ts.name = "Datos"
ts.index.name = "Timestamp"
ts.to_csv("time_series.csv")


In [42]:
# Leer archivo.

ts_2 = pd.read_csv("time_series.csv", index_col = "Timestamp")

print(ts_2)

                     Datos
Timestamp                 
2012-01-01 00:00:00    184
2012-01-01 00:00:01    448
2012-01-01 00:00:02    323
2012-01-01 00:00:03     62
2012-01-01 00:00:04    405
...                    ...
2012-01-01 00:03:15    496
2012-01-01 00:03:16    225
2012-01-01 00:03:17    361
2012-01-01 00:03:18     95
2012-01-01 00:03:19    255

[200 rows x 1 columns]


In [43]:
print( ts_2.index )

Index(['2012-01-01 00:00:00', '2012-01-01 00:00:01', '2012-01-01 00:00:02',
       '2012-01-01 00:00:03', '2012-01-01 00:00:04', '2012-01-01 00:00:05',
       '2012-01-01 00:00:06', '2012-01-01 00:00:07', '2012-01-01 00:00:08',
       '2012-01-01 00:00:09',
       ...
       '2012-01-01 00:03:10', '2012-01-01 00:03:11', '2012-01-01 00:03:12',
       '2012-01-01 00:03:13', '2012-01-01 00:03:14', '2012-01-01 00:03:15',
       '2012-01-01 00:03:16', '2012-01-01 00:03:17', '2012-01-01 00:03:18',
       '2012-01-01 00:03:19'],
      dtype='object', name='Timestamp', length=200)


In [44]:
ts_2.index = pd.to_datetime( ts_2.index )

print(ts_2.index)

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               ...
               '2012-01-01 00:03:10', '2012-01-01 00:03:11',
               '2012-01-01 00:03:12', '2012-01-01 00:03:13',
               '2012-01-01 00:03:14', '2012-01-01 00:03:15',
               '2012-01-01 00:03:16', '2012-01-01 00:03:17',
               '2012-01-01 00:03:18', '2012-01-01 00:03:19'],
              dtype='datetime64[ns]', name='Timestamp', length=200, freq=None)


In [45]:
# Correlación

# Creamos un dataframe aleatorio.
df = pd.DataFrame(np.random.randn(1000,2), columns = ["Col_1", "Col_2"])

print(df.head())
print()

# Matriz de correlación
print( df.corr() )
print()

# Matriz de covarianza
print( df.cov() )
print()

      Col_1     Col_2
0  1.562925  0.437307
1  0.688374  1.092793
2  0.783194  0.046905
3 -0.237644 -0.509770
4 -0.255502  0.105617

          Col_1     Col_2
Col_1  1.000000  0.001737
Col_2  0.001737  1.000000

          Col_1     Col_2
Col_1  0.989122  0.001745
Col_2  0.001745  1.019854



In [46]:
# Concatenar dataframes.

# Creamos un dataframe aleatorio.
df_1 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])
df_2 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])

print(df_1.head())
print()
print(df_2.head())
print()

# Concatenamos.
df = pd.concat([df_1, df_2], axis = 0, ignore_index = True)

# Reiniciamos el índice.
df = df.reset_index(drop = True)

print(df)

      Col_1     Col_2
0 -1.201299  1.438965
1  2.364967 -0.051359
2  0.808376  0.257757
3 -0.918288  0.881175
4  0.598928 -1.399259

      Col_1     Col_2
0  0.686800  0.770382
1  0.462271  1.135926
2 -0.137846 -0.072558
3 -0.202865 -0.526763
4  1.795157 -1.419180

        Col_1     Col_2
0   -1.201299  1.438965
1    2.364967 -0.051359
2    0.808376  0.257757
3   -0.918288  0.881175
4    0.598928 -1.399259
..        ...       ...
195 -0.526915 -1.070878
196  0.268223  1.236221
197 -1.026228  1.399747
198  0.732366 -1.404635
199  0.012224  0.717642

[200 rows x 2 columns]


In [47]:
# Concatenar dataframes.

# Creamos un dataframe aleatorio.
df_1 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])
df_2 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_3", "Col_4"])

print(df_1.head())
print()
print(df_2.head())
print()

# Concatenamos.
df = pd.concat([df_1, df_2], axis = 1)

print(df)

      Col_1     Col_2
0 -1.311240 -1.072674
1 -0.294746 -1.617547
2  0.143844  0.010000
3  0.560983 -1.356749
4  0.355116 -0.193275

      Col_3     Col_4
0  0.977635  0.210739
1  0.119943 -0.261642
2 -0.082919 -0.776785
3  0.941000 -0.698441
4 -0.070891 -0.781094

       Col_1     Col_2     Col_3     Col_4
0  -1.311240 -1.072674  0.977635  0.210739
1  -0.294746 -1.617547  0.119943 -0.261642
2   0.143844  0.010000 -0.082919 -0.776785
3   0.560983 -1.356749  0.941000 -0.698441
4   0.355116 -0.193275 -0.070891 -0.781094
..       ...       ...       ...       ...
95 -0.174311  1.430512  1.523382  0.283030
96 -0.180748 -0.106633 -0.868043  0.873607
97 -1.074421 -0.122811 -1.185270 -1.392931
98 -0.872951  0.960660 -0.612654 -1.164091
99 -1.247217 -0.906712 -0.989300 -1.537168

[100 rows x 4 columns]


In [48]:
# Cargar el archivo Electrico_residencial_2010_2017
# Selecciona las entradas correspondientes a Baja California.
# Ordena los datos de menor a mayor consumo en 2010.
# Selecciona los municipios con una cantidad de usiarios en 2012 menor a 100
# Encuentra el municipio con mayor consumo en tarifa DAC.
# Selecciona solo la tarifa DAC.
# Encuentra la correlación entre el consumo de 2012 y 2013