In [1]:
# Pandas es el Excel de python.
# Obtenido de: https://pandas.pydata.org/docs/user_guide/10min.html

import pandas as pd
import numpy as np

In [2]:
# Una columna en pandas se llama una serie
# np.nan es un valor faltante
# Cada elemento tiene un índice.

s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [3]:
# El índice puede ser una fecha

fechas = pd.date_range("20130101", periods = 6, freq = "D")

print(fechas)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')


In [4]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index = fechas, name = "datos")

print(s)

2013-01-01    1.0
2013-01-02    3.0
2013-01-03    5.0
2013-01-04    NaN
2013-01-05    6.0
2013-01-06    8.0
Freq: D, Name: datos, dtype: float64


In [5]:
# Un dataframe es un conjunto de columnas.

# creado a partir de arreglos de numpy.
df = pd.DataFrame( np.random.randn(6, 4), index = fechas,
    columns = ["A", "B", "C", "D"] )

print(df)

                   A         B         C         D
2013-01-01 -0.029955  0.791315 -0.959357  0.013228
2013-01-02 -0.639850 -1.603372 -2.298525 -1.416965
2013-01-03  1.009577 -1.083771  1.311173  0.154061
2013-01-04  0.096465 -0.786210 -1.281606  0.036652
2013-01-05 -0.160273  0.981198 -0.643166  1.539274
2013-01-06 -0.633479  2.077087  1.043861  0.565009


In [6]:
# head() y tail() muestran la parte 
# superior e inferior del Dataframe.

print(df.head())
print()
print(df.tail())

                   A         B         C         D
2013-01-01 -0.029955  0.791315 -0.959357  0.013228
2013-01-02 -0.639850 -1.603372 -2.298525 -1.416965
2013-01-03  1.009577 -1.083771  1.311173  0.154061
2013-01-04  0.096465 -0.786210 -1.281606  0.036652
2013-01-05 -0.160273  0.981198 -0.643166  1.539274

                   A         B         C         D
2013-01-02 -0.639850 -1.603372 -2.298525 -1.416965
2013-01-03  1.009577 -1.083771  1.311173  0.154061
2013-01-04  0.096465 -0.786210 -1.281606  0.036652
2013-01-05 -0.160273  0.981198 -0.643166  1.539274
2013-01-06 -0.633479  2.077087  1.043861  0.565009


In [7]:
# Podemos acceder al índice y las columnas.

print(df.index)
print()
print(df.columns)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

Index(['A', 'B', 'C', 'D'], dtype='object')


In [8]:
# Trasponer.

print(df.T)

   2013-01-01  2013-01-02  2013-01-03  2013-01-04  2013-01-05  2013-01-06
A   -0.029955   -0.639850    1.009577    0.096465   -0.160273   -0.633479
B    0.791315   -1.603372   -1.083771   -0.786210    0.981198    2.077087
C   -0.959357   -2.298525    1.311173   -1.281606   -0.643166    1.043861
D    0.013228   -1.416965    0.154061    0.036652    1.539274    0.565009


In [9]:
# Convertir a un arreglo de numpy.

print( df.to_numpy() )

[[-0.029955    0.79131548 -0.95935675  0.01322846]
 [-0.63985013 -1.60337196 -2.29852478 -1.41696521]
 [ 1.00957736 -1.08377099  1.31117346  0.15406127]
 [ 0.09646463 -0.78620961 -1.28160561  0.03665236]
 [-0.16027327  0.98119827 -0.64316575  1.53927377]
 [-0.63347862  2.07708742  1.04386096  0.56500893]]


In [10]:
# Estadística descriptiva.

print( df.describe() )

              A         B         C         D
count  6.000000  6.000000  6.000000  6.000000
mean  -0.059586  0.062708 -0.471270  0.148543
std    0.607378  1.431301  1.395422  0.958045
min   -0.639850 -1.603372 -2.298525 -1.416965
25%   -0.515177 -1.009381 -1.201043  0.019084
50%   -0.095114  0.002553 -0.801261  0.095357
75%    0.064860  0.933728  0.622104  0.462272
max    1.009577  2.077087  1.311173  1.539274


In [11]:
# Ordenar datos.

# Por índice.

print( df.sort_index(ascending = False) )

                   A         B         C         D
2013-01-06 -0.633479  2.077087  1.043861  0.565009
2013-01-05 -0.160273  0.981198 -0.643166  1.539274
2013-01-04  0.096465 -0.786210 -1.281606  0.036652
2013-01-03  1.009577 -1.083771  1.311173  0.154061
2013-01-02 -0.639850 -1.603372 -2.298525 -1.416965
2013-01-01 -0.029955  0.791315 -0.959357  0.013228


In [12]:
# Por columna.
print( df.sort_values(by = "B") )

                   A         B         C         D
2013-01-02 -0.639850 -1.603372 -2.298525 -1.416965
2013-01-03  1.009577 -1.083771  1.311173  0.154061
2013-01-04  0.096465 -0.786210 -1.281606  0.036652
2013-01-01 -0.029955  0.791315 -0.959357  0.013228
2013-01-05 -0.160273  0.981198 -0.643166  1.539274
2013-01-06 -0.633479  2.077087  1.043861  0.565009


In [13]:
# Escoger una columna.

print( df["A"] )

2013-01-01   -0.029955
2013-01-02   -0.639850
2013-01-03    1.009577
2013-01-04    0.096465
2013-01-05   -0.160273
2013-01-06   -0.633479
Freq: D, Name: A, dtype: float64


In [14]:
# Escoger una fila.

print(df.index[0])
print( df.loc[df.index[0]] )

2013-01-01 00:00:00
A   -0.029955
B    0.791315
C   -0.959357
D    0.013228
Name: 2013-01-01 00:00:00, dtype: float64


In [15]:
# Escoger por número de fila.

print( df.iloc[0] )

A   -0.029955
B    0.791315
C   -0.959357
D    0.013228
Name: 2013-01-01 00:00:00, dtype: float64


In [16]:
# Escoger por fila y columna.

print( df.loc[df.index[2], "B"] )

-1.083770990817644


In [17]:
# Escoger por número de fila y columna.

print( df.iloc[2, 1] )

-1.083770990817644


In [18]:
# Escoger una subsección del dataframe.

df_2 = df.iloc[2:4, 1:3]
print( df_2 )

                   B         C
2013-01-03 -1.083771  1.311173
2013-01-04 -0.786210 -1.281606


In [19]:
df_2 = df.iloc[2:4, :]
print( df_2 )

# Las copias y vistas funcionan igual que en numpy!!

                   A         B         C         D
2013-01-03  1.009577 -1.083771  1.311173  0.154061
2013-01-04  0.096465 -0.786210 -1.281606  0.036652


In [20]:
# Búsqueda por condiciones.

# En una columna.
print( df[df["A"] > 0] )

                   A         B         C         D
2013-01-03  1.009577 -1.083771  1.311173  0.154061
2013-01-04  0.096465 -0.786210 -1.281606  0.036652


In [21]:
# En todo el dataframe.

print( df[df>0] )

                   A         B         C         D
2013-01-01       NaN  0.791315       NaN  0.013228
2013-01-02       NaN       NaN       NaN       NaN
2013-01-03  1.009577       NaN  1.311173  0.154061
2013-01-04  0.096465       NaN       NaN  0.036652
2013-01-05       NaN  0.981198       NaN  1.539274
2013-01-06       NaN  2.077087  1.043861  0.565009


In [22]:
# Agregar columnas.
# Todos los elementos de una columna son del mismo tipo.
# Las columnas pueden tener distintos tipos.

df["E"] = ["uno", "uno", "dos", "tres", "cuatro", "tres"]

print(df)

                   A         B         C         D       E
2013-01-01 -0.029955  0.791315 -0.959357  0.013228     uno
2013-01-02 -0.639850 -1.603372 -2.298525 -1.416965     uno
2013-01-03  1.009577 -1.083771  1.311173  0.154061     dos
2013-01-04  0.096465 -0.786210 -1.281606  0.036652    tres
2013-01-05 -0.160273  0.981198 -0.643166  1.539274  cuatro
2013-01-06 -0.633479  2.077087  1.043861  0.565009    tres


In [23]:
# Nueva columna con arreglos.

df["F"] = np.linspace(1, 5, 6)
print(df.head())

                   A         B         C         D       E    F
2013-01-01 -0.029955  0.791315 -0.959357  0.013228     uno  1.0
2013-01-02 -0.639850 -1.603372 -2.298525 -1.416965     uno  1.8
2013-01-03  1.009577 -1.083771  1.311173  0.154061     dos  2.6
2013-01-04  0.096465 -0.786210 -1.281606  0.036652    tres  3.4
2013-01-05 -0.160273  0.981198 -0.643166  1.539274  cuatro  4.2


In [24]:
# Buscar valores específicos con isin()

print( df[ df["E"].isin( ["dos", "cuatro"] ) ] )

                   A         B         C         D       E    F
2013-01-03  1.009577 -1.083771  1.311173  0.154061     dos  2.6
2013-01-05 -0.160273  0.981198 -0.643166  1.539274  cuatro  4.2


In [25]:
# Podemos cambiar un valor específico.

df.iloc[0, 1] = 10
print(df)

                   A          B         C         D       E    F
2013-01-01 -0.029955  10.000000 -0.959357  0.013228     uno  1.0
2013-01-02 -0.639850  -1.603372 -2.298525 -1.416965     uno  1.8
2013-01-03  1.009577  -1.083771  1.311173  0.154061     dos  2.6
2013-01-04  0.096465  -0.786210 -1.281606  0.036652    tres  3.4
2013-01-05 -0.160273   0.981198 -0.643166  1.539274  cuatro  4.2
2013-01-06 -0.633479   2.077087  1.043861  0.565009    tres  5.0


In [26]:
# Quitar columna.

df = df.drop("E", axis = 1)
print(df)

                   A          B         C         D    F
2013-01-01 -0.029955  10.000000 -0.959357  0.013228  1.0
2013-01-02 -0.639850  -1.603372 -2.298525 -1.416965  1.8
2013-01-03  1.009577  -1.083771  1.311173  0.154061  2.6
2013-01-04  0.096465  -0.786210 -1.281606  0.036652  3.4
2013-01-05 -0.160273   0.981198 -0.643166  1.539274  4.2
2013-01-06 -0.633479   2.077087  1.043861  0.565009  5.0


In [27]:
# Establecer valores con condicional.

df[df<0] = 0
print(df)

                   A          B         C         D    F
2013-01-01  0.000000  10.000000  0.000000  0.013228  1.0
2013-01-02  0.000000   0.000000  0.000000  0.000000  1.8
2013-01-03  1.009577   0.000000  1.311173  0.154061  2.6
2013-01-04  0.096465   0.000000  0.000000  0.036652  3.4
2013-01-05  0.000000   0.981198  0.000000  1.539274  4.2
2013-01-06  0.000000   2.077087  1.043861  0.565009  5.0


In [28]:
# Agregamos algunos valores nulos.

df[df>2] = np.nan
print(df)

                   A         B         C         D    F
2013-01-01  0.000000       NaN  0.000000  0.013228  1.0
2013-01-02  0.000000  0.000000  0.000000  0.000000  1.8
2013-01-03  1.009577  0.000000  1.311173  0.154061  NaN
2013-01-04  0.096465  0.000000  0.000000  0.036652  NaN
2013-01-05  0.000000  0.981198  0.000000  1.539274  NaN
2013-01-06  0.000000       NaN  1.043861  0.565009  NaN


In [29]:
# Quitar filas con valores nulos.
print(df.dropna())
print()

# Rellenar valores nulos.
print(df.fillna(-100))
print()

# Determinar valores nulos.
print(df.isna())

              A    B    C    D    F
2013-01-02  0.0  0.0  0.0  0.0  1.8

                   A           B         C         D      F
2013-01-01  0.000000 -100.000000  0.000000  0.013228    1.0
2013-01-02  0.000000    0.000000  0.000000  0.000000    1.8
2013-01-03  1.009577    0.000000  1.311173  0.154061 -100.0
2013-01-04  0.096465    0.000000  0.000000  0.036652 -100.0
2013-01-05  0.000000    0.981198  0.000000  1.539274 -100.0
2013-01-06  0.000000 -100.000000  1.043861  0.565009 -100.0

                A      B      C      D      F
2013-01-01  False   True  False  False  False
2013-01-02  False  False  False  False  False
2013-01-03  False  False  False  False   True
2013-01-04  False  False  False  False   True
2013-01-05  False  False  False  False   True
2013-01-06  False   True  False  False   True


In [30]:
# operaciones.

# Entre columnas.
df["G"] = df["A"] + df["C"]

print(df)
print()

# promedio
print(df.mean())
print()
print(df.mean(1))

                   A         B         C         D    F         G
2013-01-01  0.000000       NaN  0.000000  0.013228  1.0  0.000000
2013-01-02  0.000000  0.000000  0.000000  0.000000  1.8  0.000000
2013-01-03  1.009577  0.000000  1.311173  0.154061  NaN  2.320751
2013-01-04  0.096465  0.000000  0.000000  0.036652  NaN  0.096465
2013-01-05  0.000000  0.981198  0.000000  1.539274  NaN  0.000000
2013-01-06  0.000000       NaN  1.043861  0.565009  NaN  1.043861

A    0.184340
B    0.245300
C    0.392506
D    0.384704
F    1.400000
G    0.576846
dtype: float64

2013-01-01    0.202646
2013-01-02    0.300000
2013-01-03    0.959113
2013-01-04    0.045916
2013-01-05    0.504094
2013-01-06    0.663183
Freq: D, dtype: float64


In [31]:
# Aplicar operación.
# np.cumsum() hace una suma acumulada.
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F,G
2013-01-01,0.0,,0.0,0.013228,1.0,0.0
2013-01-02,0.0,0.0,0.0,0.013228,2.8,0.0
2013-01-03,1.009577,0.0,1.311173,0.16729,,2.320751
2013-01-04,1.106042,0.0,1.311173,0.203942,,2.417215
2013-01-05,1.106042,0.981198,1.311173,1.743216,,2.417215
2013-01-06,1.106042,,2.355034,2.308225,,3.461076


In [32]:
# Logaritmo a cada elemento.
df["H"] = np.log( df["C"] )

df

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,A,B,C,D,F,G,H
2013-01-01,0.0,,0.0,0.013228,1.0,0.0,-inf
2013-01-02,0.0,0.0,0.0,0.0,1.8,0.0,-inf
2013-01-03,1.009577,0.0,1.311173,0.154061,,2.320751,0.270923
2013-01-04,0.096465,0.0,0.0,0.036652,,0.096465,-inf
2013-01-05,0.0,0.981198,0.0,1.539274,,0.0,-inf
2013-01-06,0.0,,1.043861,0.565009,,1.043861,0.042926


In [33]:
# Recorrer una columna una cierta cantidad de filas.

df["I"] = df["C"].shift(2)

df

Unnamed: 0,A,B,C,D,F,G,H,I
2013-01-01,0.0,,0.0,0.013228,1.0,0.0,-inf,
2013-01-02,0.0,0.0,0.0,0.0,1.8,0.0,-inf,
2013-01-03,1.009577,0.0,1.311173,0.154061,,2.320751,0.270923,0.0
2013-01-04,0.096465,0.0,0.0,0.036652,,0.096465,-inf,0.0
2013-01-05,0.0,0.981198,0.0,1.539274,,0.0,-inf,1.311173
2013-01-06,0.0,,1.043861,0.565009,,1.043861,0.042926,0.0


In [34]:
# Agrupar datos

fechas = pd.date_range("20130101", periods = 200, freq = "D")
df = pd.DataFrame( np.random.randn(200, 4), index = fechas,
    columns = ["A", "B", "C", "D"] )

print(df)

                   A         B         C         D
2013-01-01  0.692775  0.864991 -0.174458 -2.124173
2013-01-02 -0.956523 -0.860580  0.386738 -0.371287
2013-01-03 -0.312029 -1.483915 -1.445393  0.173630
2013-01-04  0.571486  0.737125 -1.308373  0.879276
2013-01-05  0.133828 -1.494838  0.858746 -0.455372
...              ...       ...       ...       ...
2013-07-15  0.573632  1.095817  0.243636  0.885749
2013-07-16 -0.482022 -0.814978 -1.963382  0.566107
2013-07-17 -0.262109  0.363217 -0.720502 -0.625185
2013-07-18 -0.914503 -0.200421  1.083039 -0.646416
2013-07-19  1.042444 -2.119689 -0.006965 -0.535855

[200 rows x 4 columns]


In [35]:
# Agrupamos y realizamos una operación.
df.groupby(df.index.month).sum()

Unnamed: 0,A,B,C,D
1,11.348114,9.886959,-2.658161,3.025521
2,4.323873,4.011944,-6.432318,2.489803
3,-1.884798,8.181839,-6.068654,2.597035
4,-9.007724,-12.620506,0.623759,0.414459
5,-2.625622,-5.56993,-5.686952,-0.506349
6,-3.505536,-2.022315,5.044583,7.370641
7,3.235357,-6.743474,1.131728,4.931266


In [36]:
df.groupby(df.index.day).mean()

Unnamed: 0,A,B,C,D
1,0.030964,0.368589,-0.93104,-0.407003
2,-0.821352,-0.094202,0.159229,0.062816
3,-0.48862,-0.376746,-0.157382,0.006404
4,0.560289,0.031361,-0.330725,0.555317
5,-0.424152,-0.572082,-0.10526,0.367265
6,0.079052,0.43235,0.297869,0.177384
7,0.613838,-0.257498,0.233614,0.253418
8,0.02369,-0.386617,0.445436,0.397491
9,0.070875,0.585341,-0.352931,0.684521
10,-0.238,0.320594,-0.00792,0.111347


In [37]:
# Hay mucha flexibilidad en el manejo de las fechas para series de tiempo.

rango = pd.date_range("1/1/2012", periods = 200, freq = "S")
ts = pd.Series(np.random.randint( 0, 500, len(rango) ), index = rango)

print(ts)

2012-01-01 00:00:00    321
2012-01-01 00:00:01    422
2012-01-01 00:00:02    400
2012-01-01 00:00:03    117
2012-01-01 00:00:04    480
                      ... 
2012-01-01 00:03:15    110
2012-01-01 00:03:16    341
2012-01-01 00:03:17    143
2012-01-01 00:03:18    458
2012-01-01 00:03:19    158
Freq: S, Length: 200, dtype: int64


In [38]:
# Hacemos un resampleo.

print( ts.resample("1Min").sum() )

2012-01-01 00:00:00    16054
2012-01-01 00:01:00    13360
2012-01-01 00:02:00    15558
2012-01-01 00:03:00     4299
Freq: T, dtype: int64


In [39]:
# Leer y escribir archivos es muy fácil con 
# el formato csv, compatible con Excel.

# Escribir.
ts.name = "Datos"
ts.index.name = "Timestamp"
ts.to_csv("time_series.csv")


In [40]:
# Leer archivo.

ts_2 = pd.read_csv("time_series.csv", index_col = "Timestamp")

print(ts_2)

                     Datos
Timestamp                 
2012-01-01 00:00:00    321
2012-01-01 00:00:01    422
2012-01-01 00:00:02    400
2012-01-01 00:00:03    117
2012-01-01 00:00:04    480
...                    ...
2012-01-01 00:03:15    110
2012-01-01 00:03:16    341
2012-01-01 00:03:17    143
2012-01-01 00:03:18    458
2012-01-01 00:03:19    158

[200 rows x 1 columns]


In [41]:
print( ts_2.index )

Index(['2012-01-01 00:00:00', '2012-01-01 00:00:01', '2012-01-01 00:00:02',
       '2012-01-01 00:00:03', '2012-01-01 00:00:04', '2012-01-01 00:00:05',
       '2012-01-01 00:00:06', '2012-01-01 00:00:07', '2012-01-01 00:00:08',
       '2012-01-01 00:00:09',
       ...
       '2012-01-01 00:03:10', '2012-01-01 00:03:11', '2012-01-01 00:03:12',
       '2012-01-01 00:03:13', '2012-01-01 00:03:14', '2012-01-01 00:03:15',
       '2012-01-01 00:03:16', '2012-01-01 00:03:17', '2012-01-01 00:03:18',
       '2012-01-01 00:03:19'],
      dtype='object', name='Timestamp', length=200)


In [42]:
ts_2.index = pd.to_datetime( ts_2.index )

print(ts_2.index)

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               ...
               '2012-01-01 00:03:10', '2012-01-01 00:03:11',
               '2012-01-01 00:03:12', '2012-01-01 00:03:13',
               '2012-01-01 00:03:14', '2012-01-01 00:03:15',
               '2012-01-01 00:03:16', '2012-01-01 00:03:17',
               '2012-01-01 00:03:18', '2012-01-01 00:03:19'],
              dtype='datetime64[ns]', name='Timestamp', length=200, freq=None)


In [43]:
# Correlación

# Creamos un dataframe aleatorio.
df = pd.DataFrame(np.random.randn(1000,2), columns = ["Col_1", "Col_2"])

print(df.head())
print()

# Matriz de correlación
print( df.corr() )
print()

# Matriz de covarianza
print( df.cov() )
print()

# Autocorrelación.
print(df["Col_1"].autocorr(lag = 1))

      Col_1     Col_2
0  0.011712 -1.012935
1 -1.157727 -0.351751
2 -0.205312  0.784919
3 -0.114543  0.110804
4 -1.248067  1.581528

          Col_1     Col_2
Col_1  1.000000  0.029431
Col_2  0.029431  1.000000

          Col_1     Col_2
Col_1  0.990424  0.029596
Col_2  0.029596  1.021027

0.037162751548199445


In [44]:
# Concatenar dataframes.

# Creamos un dataframe aleatorio.
df_1 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])
df_2 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])

print(df_1.head())
print()
print(df_2.head())
print()

# Concatenamos.
df = pd.concat([df_1, df_2], axis = 0, ignore_index = True)

# Reiniciamos el índice.
df = df.reset_index(drop = True)

print(df)

      Col_1     Col_2
0 -0.056030  0.023527
1 -0.489505  0.824042
2 -0.276109 -0.521740
3 -2.693434 -0.566829
4  1.290869 -2.115969

      Col_1     Col_2
0 -1.105007 -0.124848
1  2.944958  0.039948
2 -0.723169  0.210178
3  0.198669  0.026255
4 -0.305018  0.206550

        Col_1     Col_2
0   -0.056030  0.023527
1   -0.489505  0.824042
2   -0.276109 -0.521740
3   -2.693434 -0.566829
4    1.290869 -2.115969
..        ...       ...
195  1.713550  0.573105
196  0.582938 -1.030717
197  1.395751 -0.126859
198  1.122381 -1.428322
199 -1.448325  0.461974

[200 rows x 2 columns]


In [45]:
# Concatenar dataframes.

# Creamos un dataframe aleatorio.
df_1 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_1", "Col_2"])
df_2 = pd.DataFrame(np.random.randn(100,2), columns = ["Col_3", "Col_4"])

print(df_1.head())
print()
print(df_2.head())
print()

# Concatenamos.
df = pd.concat([df_1, df_2], axis = 1)

print(df)

      Col_1     Col_2
0  0.828310  0.550275
1  0.682187 -0.619901
2  0.241013  0.068001
3 -0.161554 -0.930467
4  1.072136 -0.772513

      Col_3     Col_4
0 -0.012879  0.649223
1  1.307390 -0.017059
2  0.748104  0.697904
3  1.179369 -0.024348
4 -0.903988 -0.484414

       Col_1     Col_2     Col_3     Col_4
0   0.828310  0.550275 -0.012879  0.649223
1   0.682187 -0.619901  1.307390 -0.017059
2   0.241013  0.068001  0.748104  0.697904
3  -0.161554 -0.930467  1.179369 -0.024348
4   1.072136 -0.772513 -0.903988 -0.484414
..       ...       ...       ...       ...
95 -1.470397 -1.088074  1.653223 -0.285305
96 -0.932873  0.155548  0.894440 -1.177244
97 -0.496353  1.480891 -0.827917  0.228381
98 -0.096301  0.756939 -0.476784 -1.687545
99  0.565201 -0.198155  2.065439  1.474111

[100 rows x 4 columns]


In [46]:
# Se puede obtener un histograma.

s = pd.Series(np.random.randint(0, 7, size = 10))
print (s)


0    0
1    6
2    1
3    4
4    4
5    6
6    4
7    5
8    3
9    3
dtype: int64


In [47]:
print(s.value_counts())

4    3
6    2
3    2
0    1
1    1
5    1
dtype: int64


In [48]:
# Cargar el archivo Electrico_residencial_2010_2017
# Selecciona las entradas correspondientes a Baja California.
# Ordena los datos de menor a mayor consumo en 2010.
# Selecciona los municipios con una cantidad de usiarios en 2012 menor a 100
# Encuentra el municipio con mayor consumo en tarifa DAC.
# Selecciona solo la tarifa DAC.
# Encuentra la correlación entre el consumo de 2012 y 2013