In [None]:
import numpy as np
import pandas as pd

pd.__version__

'1.5.3'

In [None]:
s1 = pd.Series([0.25, 0.5, 0.5, 1.0, 1.25])
s1

0    0.25
1    0.50
2    0.50
3    1.00
4    1.25
dtype: float64

In [None]:
s2 = pd.Series([12, -4, 7, 9], index=['a', 'b', 'c', 'd'])
s2

a    12
b    -4
c     7
d     9
dtype: int64

In [None]:
s2.values

array([12, -4,  7,  9])

In [None]:
s2.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [None]:
s2.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [None]:
'b' in s2

True

In [None]:
s2[2]

7

In [None]:
s2.iloc[2]

7

In [None]:
s2['a']

12

In [None]:
s2[0:2]

a    12
b    -4
dtype: int64

In [None]:
s2[['b', 'a']]

b    -4
a    12
dtype: int64

In [None]:
s2[1] = 0
s2

a    12
b     0
c     7
d     9
dtype: int64

In [None]:
s2['b'] = 2
s2

a    12
b     2
c     7
d     9
dtype: int64

In [None]:
arr = np.array([10, 12, 13, 14]) #with a numpy array
s3 = pd.Series(arr)
s3

0    10
1    12
2    13
3    14
dtype: int64

In [None]:
s4 = pd.Series(s2)
s4

a    12
b     2
c     7
d     9
dtype: int64

In [None]:
s2 > 8 # manejo de mascaras y aplicar toda una logica booleana

a     True
b    False
c    False
d     True
dtype: bool

In [None]:
s2[s2>8] # filtra solo los que cumplen la condicion

a    12
d     9
dtype: int64

In [None]:
s2 / 2

a    6.0
b    1.0
c    3.5
d    4.5
dtype: float64

In [None]:
s5 = pd.Series([1,0,2,1,2,3], index=['white', 'white', 'blue', 'green', 'green', 'yellow'])
s5

white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

In [None]:
s5.unique()

array([1, 0, 2, 3])

In [None]:
s5.value_counts()

1    2
2    2
0    1
3    1
dtype: int64

In [None]:
s5.isin([0, 1]) #esta en el rango y verifica si los elem estan en el rango

white      True
white      True
blue      False
green      True
green     False
yellow    False
dtype: bool

In [None]:
s5[s5.isin([0,1])] #filtrando solo los true's con máscara

white    1
white    0
green    1
dtype: int64

In [None]:
s6 = pd.Series([15, -3, np.NaN, 24])
s6

0    15.0
1    -3.0
2     NaN
3    24.0
dtype: float64

In [None]:
s6.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [None]:
s6[s6.isnull()]

2   NaN
dtype: float64

In [None]:
s6.notnull()

0     True
1     True
2    False
3     True
dtype: bool

In [None]:
s6[s6.notnull()]

0    15.0
1    -3.0
3    24.0
dtype: float64

## Series as Dictionaries

In [None]:
d1 = {'red': 2000,  'blue':1000, 'yellow': 500, 'orange':1000}
s7 = pd.Series(d1)
s7

red       2000
blue      1000
yellow     500
orange    1000
dtype: int64

### Apply reindex

In [None]:
# reindex, para redefinir las claves ignorando las claves del diccionario original
colors = ['red', 'yellow', 'orange', 'blue', 'green']
s8 = pd.Series(d1, index=colors)
s8

red       2000.0
yellow     500.0
orange    1000.0
blue      1000.0
green        NaN
dtype: float64

### Operations between Series

In [None]:
#operaciones entre series
d2 = {'red':400, 'yellow': 1000, 'black': 700}
s9 = pd.Series(d2)
s8 + s9

black        NaN
blue         NaN
green        NaN
orange       NaN
red       2400.0
yellow    1500.0
dtype: float64

### Access to Series as One-dimensional array

In [None]:
s2

a    12
b     2
c     7
d     9
dtype: int64

In [None]:
s2['a':'c']

a    12
b     2
c     7
dtype: int64

In [None]:
##Masking -> filtrado
s2[(s2 > 1) & (s2 < 8)]

b    2
c    7
dtype: int64

In [None]:
# Fancy index
s2[['a', 'd']]

a    12
d     9
dtype: int64

## Data Frame Indexing and Selection Methods: loc, iloc

In [None]:
# data frame indexing and selection methods: loc, iloc
s2

a    12
b     2
c     7
d     9
dtype: int64

In [None]:
s2.loc['a']

12

In [None]:
s2.iloc[1]

2

In [None]:
s2.iloc[1:3]

b    2
c    7
dtype: int64

# Data Frame

In [None]:
# data frames son arrays de dos dimensiones
import pandas as pd
import numpy as np

data = [{'a': i, 'b': 2*i} for i in range(4)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4
3,3,6


In [None]:
population_dict = {
    'California': 38332521,
    'Texas': 26448193,
    'New York': 19651127,
    'Florida': 19552860,
    'Illinois': 12882135
}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [None]:
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


## Defining a DataFrame from a dictionary of Series objects

In [None]:
data1 = {
    'state': ['Ohio','Ohio','Ohio', 'Nevada','Nevada', 'Nevada' ],
    'year': [2000, 2001, 2002, 2003, 2004, 2005],
    'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]
}
df1 = pd.DataFrame(data1)
df1

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2003,2.4
4,Nevada,2004,2.9
5,Nevada,2005,3.2


In [None]:
data2 = {
    'color': ['blue', 'green', 'yellow', 'red', 'white'],
    'object': ['ball', 'pen', 'pencil', 'paper', 'mug'],
    'price': [1.2, 1.0, 0.6, 0.9, 1.7]
}
material = pd.DataFrame(data2)
material

Unnamed: 0,color,object,price
0,blue,ball,1.2
1,green,pen,1.0
2,yellow,pencil,0.6
3,red,paper,0.9
4,white,mug,1.7


## Defining a DataFrame from Series

In [None]:
population_dict = {
    'California': 38332521,
    'Texas': 26448193,
    'New York': 19651127,
    'Florida': 19552860,
    'Illinois': 12882135
}
population  = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [None]:
area_dict = {
    'California': 423967,
    'Texas': 695662,
    'New York': 141297,
    'Florida': 170312,
    'Illinois': 149995
}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [None]:
states = pd.DataFrame({
    'population':population,
    'area': area
})
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


## Defining a DataFrame from two-dimensional Numpy arrary



In [None]:
material3 = pd.DataFrame(
    np.arange(16).reshape((4, 4)),
    index=['red', 'blue', 'yellow', 'white'],
    columns=['ball', 'pen', 'pencil', 'paper']
)
material3

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15
