# Introducción a Pandas

## Importando dependencias (librerias)

In [1]:
import numpy as np
import pandas as pd
pd.__version__

'0.24.2'

## Series

In [2]:
srA = pd.Series([12, -4, 7, 5])
srA

0    12
1    -4
2     7
3     5
dtype: int64

In [3]:
srA = pd.Series([12, -4, 7, 5],
               index = ['a', 'b', 'c', 'd'])
srA

a    12
b    -4
c     7
d     5
dtype: int64

In [5]:
srA.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [6]:
srA.values

array([12, -4,  7,  5], dtype=int64)

In [8]:
print(srA, srA[2], srA['b'], srA[0:2], srA['a':'b'], srA['a':'c'], sep="\n\n")

a    12
b    -4
c     7
d     5
dtype: int64

7

-4

a    12
b    -4
dtype: int64

a    12
b    -4
dtype: int64

a    12
b    -4
c     7
dtype: int64


In [10]:
srB = pd.Series(np.arange(1,5))
srB

0    1
1    2
2    3
3    4
dtype: int32

In [11]:
srB[srB>1]

1    2
2    3
3    4
dtype: int32

In [13]:
srB[2] = -100
srB

0      1
1      2
2   -100
3      4
dtype: int32

In [14]:
np.abs(srB)

0      1
1      2
2    100
3      4
dtype: int32

In [15]:
np.log(np.abs(srB))

0    0.000000
1    0.693147
2    4.605170
3    1.386294
dtype: float64

In [16]:
srC = pd.Series([1, 0, 2, 1, 2, 3],
               index=["blanco", "blanco", "azul", "verde",
                      "verde", "amarillo"])
srC

blanco      1
blanco      0
azul        2
verde       1
verde       2
amarillo    3
dtype: int64

In [17]:
srC.unique()

array([1, 0, 2, 3], dtype=int64)

In [18]:
srC.index.unique()

Index(['blanco', 'azul', 'verde', 'amarillo'], dtype='object')

In [21]:
srC.value_counts()

2    2
1    2
3    1
0    1
dtype: int64

In [22]:
srC.isin([0,3])

blanco      False
blanco       True
azul        False
verde       False
verde       False
amarillo     True
dtype: bool

In [23]:
srC[srC.isin([0,3])]

blanco      0
amarillo    3
dtype: int64

## Valores NaN (Not a Number)

In [2]:
import numpy as np
import pandas as pd
np.nan

nan

In [3]:
type(np.nan)

float

In [4]:
srA = pd.Series([-5, -3, np.nan, 14])
srA

0    -5.0
1    -3.0
2     NaN
3    14.0
dtype: float64

In [5]:
srA.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [7]:
srA[srA.isnull()]

2   NaN
dtype: float64

In [8]:
srA[srA.notnull()]

0    -5.0
1    -3.0
3    14.0
dtype: float64

## Series de diccionarios

In [9]:
dictA = {'rojo':200, 'azul':1000, 'verde':500}
srA = pd.Series(dictA)
srA

rojo      200
azul     1000
verde     500
dtype: int64

## DataFrame

In [10]:
dictData = {'color': ['azul', 'verde','amarillo'],
            'objeto': ['papel', 'taza', 'lapicero'],
            'precio': [0.15, 2.5, 1.2]}
dfData = pd.DataFrame(dictData)
dfData

Unnamed: 0,color,objeto,precio
0,azul,papel,0.15
1,verde,taza,2.5
2,amarillo,lapicero,1.2


In [11]:
dfData.columns

Index(['color', 'objeto', 'precio'], dtype='object')

In [12]:
dfData.index

RangeIndex(start=0, stop=3, step=1)

In [13]:
dfData.values

array([['azul', 'papel', 0.15],
       ['verde', 'taza', 2.5],
       ['amarillo', 'lapicero', 1.2]], dtype=object)

In [14]:
dfData.color

0        azul
1       verde
2    amarillo
Name: color, dtype: object

In [15]:
dfData.loc[2]

color     amarillo
objeto    lapicero
precio         1.2
Name: 2, dtype: object

In [16]:
dfData[0:2]

Unnamed: 0,color,objeto,precio
0,azul,papel,0.15
1,verde,taza,2.5


In [17]:
dfData['nuevo'] = 12
dfData

Unnamed: 0,color,objeto,precio,nuevo
0,azul,papel,0.15,12
1,verde,taza,2.5,12
2,amarillo,lapicero,1.2,12
