# Series

In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
etiquetas = ['a','b','c']
datos = [1,2,3]
pd.Series(data = datos, index = etiquetas)

a    1
b    2
c    3
dtype: int64

In [4]:
array = np.arange(5,8)
etiqueta = ['venta1','venta2','venta3']
serie1 = pd.Series(array, etiqueta)

In [5]:
serie1

venta1    5
venta2    6
venta3    7
dtype: int32

In [6]:
serie1['venta1']

5

In [7]:
serie1[2]

7

In [8]:
datos2 = ['informatica',300,'impresora',400]
serie2 = pd.Series(datos2)

In [9]:
serie2

0    informatica
1            300
2      impresora
3            400
dtype: object

In [10]:
serie3 = pd.Series([1,2,3,4],['venta1','venta2','venta3','venta4'])
serie4 = pd.Series([4,5,6],['venta1','venta2','venta3'])

In [11]:
serie_suma = serie3 + serie4

In [12]:
serie_suma

venta1    5.0
venta2    7.0
venta3    9.0
venta4    NaN
dtype: float64

## Dataframes

In [13]:
fila = ['ventanas1','ventanas2','ventanas3']
columnas = ['zonaA','zonaB','zonaC']
datos = [[123,421,256], [234,541,257],[120,451,258]]

In [14]:
dataframe = pd.DataFrame(datos,fila,columnas)

In [15]:
dataframe

Unnamed: 0,zonaA,zonaB,zonaC
ventanas1,123,421,256
ventanas2,234,541,257
ventanas3,120,451,258


In [16]:
dataframe.loc[['ventanas1','ventanas2']]

Unnamed: 0,zonaA,zonaB,zonaC
ventanas1,123,421,256
ventanas2,234,541,257


In [17]:
dataframe['zonaA']

ventanas1    123
ventanas2    234
ventanas3    120
Name: zonaA, dtype: int64

In [18]:
dataframe['zonaA']['ventanas1']

123

In [19]:
dataframe['TodaslasZonas'] = dataframe['zonaA'] + dataframe['zonaB'] + dataframe['zonaC']

In [20]:
dataframe

Unnamed: 0,zonaA,zonaB,zonaC,TodaslasZonas
ventanas1,123,421,256,800
ventanas2,234,541,257,1032
ventanas3,120,451,258,829


In [21]:
dataframe.drop('TodaslasZonas',axis=1)

Unnamed: 0,zonaA,zonaB,zonaC
ventanas1,123,421,256
ventanas2,234,541,257
ventanas3,120,451,258


In [22]:
dataframe

Unnamed: 0,zonaA,zonaB,zonaC,TodaslasZonas
ventanas1,123,421,256,800
ventanas2,234,541,257,1032
ventanas3,120,451,258,829


In [23]:
dataframe.drop('TodaslasZonas',axis=1, inplace=True)

In [24]:
dataframe

Unnamed: 0,zonaA,zonaB,zonaC
ventanas1,123,421,256
ventanas2,234,541,257
ventanas3,120,451,258


In [25]:
dataframe.drop('ventanas3')

Unnamed: 0,zonaA,zonaB,zonaC
ventanas1,123,421,256
ventanas2,234,541,257


In [26]:
dataframe.drop('ventanas3', inplace=True)

In [27]:
dataframe

Unnamed: 0,zonaA,zonaB,zonaC
ventanas1,123,421,256
ventanas2,234,541,257


In [28]:
dataframe.shape

(2, 3)

In [29]:
#selección de datos con una condición

In [30]:
filas = 'ventas1 ventas2 ventas3'.split()
filas

['ventas1', 'ventas2', 'ventas3']

In [31]:
columnas = 'zonaA zonaB zonaC'.split()

In [32]:
datos = [[120,340,250],[210,450,250],[310,210,140]]

In [33]:
datos

[[120, 340, 250], [210, 450, 250], [310, 210, 140]]

In [34]:
dataframe = pd.DataFrame(datos,filas,columnas)

In [35]:
dataframe

Unnamed: 0,zonaA,zonaB,zonaC
ventas1,120,340,250
ventas2,210,450,250
ventas3,310,210,140


In [36]:
condicion = dataframe > 200

In [37]:
dataframe[condicion]

Unnamed: 0,zonaA,zonaB,zonaC
ventas1,,340,250.0
ventas2,210.0,450,250.0
ventas3,310.0,210,


In [38]:
condicion = dataframe['zonaA'] > 200
dataframe[condicion]

Unnamed: 0,zonaA,zonaB,zonaC
ventas2,210,450,250
ventas3,310,210,140


In [39]:
condicion = (dataframe['zonaA'] > 200) & (dataframe['zonaB']>300)

In [40]:
dataframe[condicion]

Unnamed: 0,zonaA,zonaB,zonaC
ventas2,210,450,250


In [41]:
dataframe[condicion][['zonaA','zonaC']]

Unnamed: 0,zonaA,zonaC
ventas2,210,250


In [42]:
nuevas_filas = 'dia1 dia2 dia3'.split()

In [43]:
dataframe['dias'] = nuevas_filas
dataframe

Unnamed: 0,zonaA,zonaB,zonaC,dias
ventas1,120,340,250,dia1
ventas2,210,450,250,dia2
ventas3,310,210,140,dia3


In [44]:
dataframe.set_index('dias')

Unnamed: 0_level_0,zonaA,zonaB,zonaC
dias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
dia1,120,340,250
dia2,210,450,250
dia3,310,210,140


In [45]:
dataframe = dataframe.set_index('dias')

In [46]:
dataframe

Unnamed: 0_level_0,zonaA,zonaB,zonaC
dias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
dia1,120,340,250
dia2,210,450,250
dia3,310,210,140


## Tratamiento de datos nulos

In [47]:
diccionario = {'A':[4,5,np.nan], 'B':[6,1,5], 'C':[np.nan,4,np.nan]}

In [48]:
dataDiccionario = pd.DataFrame(diccionario)

In [49]:
dataDiccionario

Unnamed: 0,A,B,C
0,4.0,6,
1,5.0,1,4.0
2,,5,


In [50]:
dataDiccionario.dropna()

Unnamed: 0,A,B,C
1,5.0,1,4.0


In [51]:
dataDiccionario.dropna(axis = 1)

Unnamed: 0,B
0,6
1,1
2,5


In [52]:
dataDiccionario.fillna(value=100)

Unnamed: 0,A,B,C
0,4.0,6,100.0
1,5.0,1,4.0
2,100.0,5,100.0


In [53]:
dataDiccionario

Unnamed: 0,A,B,C
0,4.0,6,
1,5.0,1,4.0
2,,5,


In [54]:
valor_medio = dataDiccionario.mean()
valor_medio

A    4.5
B    4.0
C    4.0
dtype: float64

In [56]:
dataDiccionario.fillna(value=valor_medio)

Unnamed: 0,A,B,C
0,4.0,6,4.0
1,5.0,1,4.0
2,4.5,5,4.0


## Datos agrupados

In [57]:
ventas_3_dias = {'dias':['dia1','dia1','dia2','dia2','dia3'], 'vendedores':['Antonio','Maria','Jose','Marta','Juan'], 'ventas':[100,400,200,500,300]}

In [58]:
lasventas= pd.DataFrame(ventas_3_dias)

In [59]:
lasventas

Unnamed: 0,dias,vendedores,ventas
0,dia1,Antonio,100
1,dia1,Maria,400
2,dia2,Jose,200
3,dia2,Marta,500
4,dia3,Juan,300


In [62]:
lasventas.groupby('dias').mean()

Unnamed: 0_level_0,ventas
dias,Unnamed: 1_level_1
dia1,250
dia2,350
dia3,300


In [63]:
lasventas.groupby('dias').describe()

Unnamed: 0_level_0,ventas,ventas,ventas,ventas,ventas,ventas,ventas,ventas
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
dias,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
dia1,2.0,250.0,212.132034,100.0,175.0,250.0,325.0,400.0
dia2,2.0,350.0,212.132034,200.0,275.0,350.0,425.0,500.0
dia3,1.0,300.0,,300.0,300.0,300.0,300.0,300.0


## Combinar DataFrames

In [64]:
primero = {'A':[1,2,3], 'B':[4,5,6], 'C':[7,8,9]}

In [72]:
uno = pd.DataFrame(diccionario)
uno

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9


In [69]:
segundo = {'A':[11,12,13], 'B':[14,15,16], 'C':[17,18,19]}

In [71]:
dos = pd.DataFrame(segundo)
dos

Unnamed: 0,A,B,C
0,11,14,17
1,12,15,18
2,13,16,19


In [73]:
pd.concat([uno,dos])

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9
0,11,14,17
1,12,15,18
2,13,16,19


In [74]:
pd.concat([uno,dos],axis=1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
0,1,4,7,11,14,17
1,2,5,8,12,15,18
2,3,6,9,13,16,19


## Merge

In [77]:
valores = {'A':[1,2,3],'B':[4,5,6],'clave':['c1','c2','c3']}
lista = pd.DataFrame(valores)
lista

Unnamed: 0,A,B,clave
0,1,4,c1
1,2,5,c2
2,3,6,c3


In [79]:
valores2 = {'C':[1,2,3],'D':[4,5,6],'clave':['c1','c2','c3']}
lista2 = pd.DataFrame(valores2)
lista2

Unnamed: 0,C,D,clave
0,1,4,c1
1,2,5,c2
2,3,6,c3


In [80]:
pd.merge(lista,lista2, on='clave')

Unnamed: 0,A,B,clave,C,D
0,1,4,c1,1,4
1,2,5,c2,2,5
2,3,6,c3,3,6


## Join

In [84]:
bd = {'A':[1,2,3],'B':[4,5,6]}
df1 = pd.DataFrame(bd, index=['i1','i2','i3'])
df1

Unnamed: 0,A,B
i1,1,4
i2,2,5
i3,3,6


In [85]:
bc = {'C':[11,12,13],'D':[14,15,16]}
df2 = pd.DataFrame(bc, index=['i1','i2','i3'])
df2

Unnamed: 0,C,D
i1,11,14
i2,12,15
i3,13,16


In [86]:
df1.join(df2)

Unnamed: 0,A,B,C,D
i1,1,4,11,14
i2,2,5,12,15
i3,3,6,13,16


## Operadores

In [88]:
valor = {'A':[11,12,10,12],'B':[14,18,16,17]}
operador1 = pd.DataFrame(valor, index = ['i1', 'i2', 'i3', 'i4'])
operador1

Unnamed: 0,A,B
i1,11,14
i2,12,18
i3,10,16
i4,12,17


In [89]:
operador1['A'].unique()

array([11, 12, 10], dtype=int64)

In [90]:
operador1['A'].nunique()

3

In [92]:
operador1['A'].value_counts()

12    2
11    1
10    1
Name: A, dtype: int64

In [94]:
def multiplicar(x):
    return x*2

In [95]:
operador1['A'].apply(multiplicar)

i1    22
i2    24
i3    20
i4    24
Name: A, dtype: int64

In [97]:
operador1.apply(multiplicar)

Unnamed: 0,A,B
i1,22,28
i2,24,36
i3,20,32
i4,24,34


In [98]:
operador1['A'].apply(lambda x: x*2)

i1    22
i2    24
i3    20
i4    24
Name: A, dtype: int64

In [100]:
operador1.drop('B',axis=1)

Unnamed: 0,A
i1,11
i2,12
i3,10
i4,12


In [101]:
operador1.drop('i1')

Unnamed: 0,A,B
i2,12,18
i3,10,16
i4,12,17


In [103]:
operador1.columns

Index(['A', 'B'], dtype='object')

In [104]:
operador1.index

Index(['i1', 'i2', 'i3', 'i4'], dtype='object')

In [106]:
operador1.sort_values('B')

Unnamed: 0,A,B
i1,11,14
i3,10,16
i4,12,17
i2,12,18
