# Data Series

## Properties

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

In [2]:
%matplotlib inline

### 1

In [3]:
cities = ['London', 'Berlin', 'Warsaw', 'Paris']
print(type(cities))
cities

<class 'list'>


['London', 'Berlin', 'Warsaw', 'Paris']

In [4]:
pd.Series(cities)

0    London
1    Berlin
2    Warsaw
3     Paris
dtype: object

In [5]:
prime_numbers = (2,3,5,7,11,13,17,19)
print(type(prime_numbers))
prime_numbers

<class 'tuple'>


(2, 3, 5, 7, 11, 13, 17, 19)

In [6]:
pd.Series(prime_numbers)

0     2
1     3
2     5
3     7
4    11
5    13
6    17
7    19
dtype: int64

In [7]:
logical_values = [True, False, True]
pd.Series(logical_values)

0     True
1    False
2     True
dtype: bool

In [8]:
SpielbergFilmography = {
    'Jaws': 1975,
    '1941': 1979,
    'Indiana Jones and the raiders of the Lost Ark': 1981,
    'E.T. the Extra-Terrestrial': 1982
}
SpielbergFilmography

{'Jaws': 1975,
 '1941': 1979,
 'Indiana Jones and the raiders of the Lost Ark': 1981,
 'E.T. the Extra-Terrestrial': 1982}

In [9]:
pd.Series(SpielbergFilmography)

Jaws                                             1975
1941                                             1979
Indiana Jones and the raiders of the Lost Ark    1981
E.T. the Extra-Terrestrial                       1982
dtype: int64

### 2

In [10]:
citiesSeries = pd.Series(cities)
citiesSeries

0    London
1    Berlin
2    Warsaw
3     Paris
dtype: object

In [11]:
citiesSeries.size  # size of my Series

4

In [12]:
citiesSeries.nbytes  # how much memory cost my object

32

In [13]:
citiesSeries.is_unique

True

In [14]:
citiesSeries.is_monotonic  # sorted?

False

In [15]:
citiesSeries.index  # what is index of the Series?

RangeIndex(start=0, stop=4, step=1)

In [16]:
citiesSeries.values

array(['London', 'Berlin', 'Warsaw', 'Paris'], dtype=object)

In [17]:
citiesSeries.dtype  # type of object e.g. 'O' like Object

dtype('O')

In [18]:
citiesSeries.shape  # how many dimensions [wymiarów]

(4,)

In [19]:
citiesSeries.axes

[RangeIndex(start=0, stop=4, step=1)]

In [20]:
monotonicSeries = pd.Series([1,2,3,67,99])
monotonicSeries

0     1
1     2
2     3
3    67
4    99
dtype: int64

In [21]:
monotonicSeries.is_monotonic

True

In [22]:
monotonicSeries.is_monotonic_increasing

True

In [23]:
monotonicSeries.is_monotonic_decreasing

False

In [24]:
drinksSeries = pd.Series(['Cola', 'Redbull', 'Sprite'])
drinksSeries

0       Cola
1    Redbull
2     Sprite
dtype: object

In [25]:
drinksSeries.is_monotonic_increasing

True

## Methods

In [27]:
monotonicSeries.sum()

172

In [28]:
monotonicSeries.min()

1

In [29]:
monotonicSeries.max()

99

In [31]:
monotonicSeries.mean()  # average

34.4

In [34]:
# in this case count() and size return the same value
monotonicSeries.count()

5

In [33]:
monotonicSeries.size

5

In [35]:
monotonicSeries.product() # returns value from multipling all values in Series

39798

In [36]:
print(monotonicSeries.index)
print(monotonicSeries.keys())

RangeIndex(start=0, stop=5, step=1)
RangeIndex(start=0, stop=5, step=1)


In [38]:
print(monotonicSeries.values)

[ 1  2  3 67 99]


In [57]:
print(monotonicSeries)
print(monotonicSeries.add(10))  # add to all elements 10
print(monotonicSeries)

0     1
1     2
2     3
3    67
4    99
dtype: int64
0     11
1     12
2     13
3     77
4    109
dtype: int64
0     1
1     2
2     3
3    67
4    99
dtype: int64


In [60]:
countries = ['USA', 'Spain', 'Poland', 'Portugal', 'Italy']
currencies = ['USD', 'EUR', 'PLN', 'EUR', 'EUR']

In [61]:
                    # data,      index
curSeries = pd.Series(countries, currencies)
curSeries

USD         USA
EUR       Spain
PLN      Poland
EUR    Portugal
EUR       Italy
dtype: object

In [62]:
curSeries = pd.Series(data=countries, index=currencies)
curSeries

USD         USA
EUR       Spain
PLN      Poland
EUR    Portugal
EUR       Italy
dtype: object

## Filtering

In [2]:
numbers = [1,2,3,11,12,13]
numbers

[1, 2, 3, 11, 12, 13]

### filtering on values

In [6]:
# numbers > 10  # error
numSeries = pd.Series(numbers)
print(numSeries)
print(numSeries > 10)

0     1
1     2
2     3
3    11
4    12
5    13
dtype: int64
0    False
1    False
2    False
3     True
4     True
5     True
dtype: bool


In [7]:
numSeries.where(numSeries > 10)
# NaN - Not a number

0     NaN
1     NaN
2     NaN
3    11.0
4    12.0
5    13.0
dtype: float64

In [8]:
numSeries.where(numSeries > 10, other=-1)

0    -1
1    -1
2    -1
3    11
4    12
5    13
dtype: int64

In [9]:
numSeries.where(numSeries > 10).dropna()

3    11.0
4    12.0
5    13.0
dtype: float64

In [10]:
print(numSeries)
numSeries.where(numSeries > 10, inplace=True)
print(numSeries)

0     1
1     2
2     3
3    11
4    12
5    13
dtype: int64
0     NaN
1     NaN
2     NaN
3    11.0
4    12.0
5    13.0
dtype: float64


In [11]:
print(numSeries)
print(numSeries.dropna())
print(numSeries)
numSeries.dropna(inplace=True)
print('after dropna(inplace=True)')
print(numSeries)

0     NaN
1     NaN
2     NaN
3    11.0
4    12.0
5    13.0
dtype: float64
3    11.0
4    12.0
5    13.0
dtype: float64
0     NaN
1     NaN
2     NaN
3    11.0
4    12.0
5    13.0
dtype: float64
after dropna(inplace=True)
3    11.0
4    12.0
5    13.0
dtype: float64


In [12]:
numSeries = pd.Series(numbers)
# we work on copy and it doesn't work
numSeries.where(numSeries > 10).dropna(inplace=True)
numSeries

0     1
1     2
2     3
3    11
4    12
5    13
dtype: int64

### filtering on index/keys

In [13]:
numSeries = pd.Series(numbers)
numSeries.filter(items=[0,2,4])  # get only 1st, 3rd and 5th element

0     1
2     3
4    12
dtype: int64

## Advanced filtering

In [14]:
numSeries = pd.Series(numbers)
print(numSeries)
numSeries % 2 == 1  # only odd [nieparzyste] values

0     1
1     2
2     3
3    11
4    12
5    13
dtype: int64


0     True
1    False
2     True
3     True
4    False
5     True
dtype: bool

In [15]:
print(numSeries.where(numSeries % 2 == 1))
numSeries.where(numSeries % 2 == 1).dropna()

0     1.0
1     NaN
2     3.0
3    11.0
4     NaN
5    13.0
dtype: float64


0     1.0
2     3.0
3    11.0
5    13.0
dtype: float64

### more than 1 condition we should declare before we use multiple conditions

In [41]:
# numSeries.where(numSeries > 10 and numSeries % 2 == 1)  # error
# numSeries.where(numSeries > 10 & numSeries % 2 == 1)  # error
numGrater10 = numSeries > 10
numOdd = numSeries % 2 == 1
print(numSeries.where(numGrater10 & numOdd))
numSeries.where(numGrater10 & numOdd).dropna()

0     NaN
1     NaN
2     NaN
3    11.0
4     NaN
5    13.0
dtype: float64


3    11.0
5    13.0
dtype: float64

In [16]:
numSeries.between(3,12)  # like in sql

0    False
1    False
2     True
3     True
4     True
5    False
dtype: bool

In [17]:
numSeries.where(numSeries.between(3,12))

0     NaN
1     NaN
2     3.0
3    11.0
4    12.0
5     NaN
dtype: float64

In [18]:
# list of bools works same bool from series
numSeries.where([True, True, False, False, False, True])

0     1.0
1     2.0
2     NaN
3     NaN
4     NaN
5    13.0
dtype: float64

## Import data

### csv - comma separator values

In [48]:
pd.read_csv('./course-files/course-sources/pokemon.csv')

obj = pd.read_csv('./course-files/course-sources/pokemon.csv')
type(obj)

pandas.core.frame.DataFrame

### usecols[ Collection ] - get some cols by name

In [49]:
obj = pd.read_csv('./course-files/course-sources/pokemon.csv', usecols=['Name'])
print(type(obj))
obj

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,Name
0,Bulbasaur
1,Ivysaur
2,Venusaur
3,VenusaurMega Venusaur
4,Charmander
...,...
795,Diancie
796,DiancieMega Diancie
797,HoopaHoopa Confined
798,HoopaHoopa Unbound


### squeeze - [ściśnij] if values are only 1 col, it will be converted to Series

In [20]:
obj = pd.read_csv('./course-files/course-sources/pokemon.csv',
                  usecols=['Name'], squeeze=True)
print(type(obj))
obj

<class 'pandas.core.series.Series'>


0                  Bulbasaur
1                    Ivysaur
2                   Venusaur
3      VenusaurMega Venusaur
4                 Charmander
               ...          
795                  Diancie
796      DiancieMega Diancie
797      HoopaHoopa Confined
798       HoopaHoopa Unbound
799                Volcanion
Name: Name, Length: 800, dtype: object

In [21]:
speed = pd.read_csv('./course-files/course-sources/pokemon.csv',
                    usecols=['Speed'], squeeze=True)
speed

0       45
1       60
2       80
3       80
4       65
      ... 
795     50
796    110
797     70
798     80
799     70
Name: Speed, Length: 800, dtype: int64

In [22]:
dataFromClipboard = pd.read_clipboard(sep=',')
print(type(dataFromClipboard))
dataFromClipboard

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,s1[2:10:3]


In [23]:
oneSeries = dataFromClipboard['Name']
print(type(oneSeries))
oneSeries

KeyError: 'Name'

### head() / tail() - in default 5 first / last rows

In [24]:
oneSeries.head()

NameError: name 'oneSeries' is not defined

In [57]:
oneSeries.tail()

795                Diancie
796    DiancieMega Diancie
797    HoopaHoopa Confined
798     HoopaHoopa Unbound
799              Volcanion
Name: Name, dtype: object

In [58]:
oneSeries.head(3)

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Name, dtype: object

In [59]:
oneSeries.tail(10)

790                 Noibat
791                Noivern
792                Xerneas
793                Yveltal
794       Zygarde50% Forme
795                Diancie
796    DiancieMega Diancie
797    HoopaHoopa Confined
798     HoopaHoopa Unbound
799              Volcanion
Name: Name, dtype: object

## Standard Python method

In [62]:
print(oneSeries.head())
print(sorted(oneSeries.head()))

0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name, dtype: object
['Bulbasaur', 'Charmander', 'Ivysaur', 'Venusaur', 'VenusaurMega Venusaur']


In [64]:
print(oneSeries.min(), oneSeries.max())

Abomasnow Zygarde50% Forme


In [71]:
print(oneSeries.head())
oneSeries.name = 'Name of Pokemon'
print(oneSeries.head())

0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name of Pokemon, dtype: object
0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name of Pokemon, dtype: object



## Sorting

In [25]:
pok = obj.copy()
pok.sort_values().head()

510                  Abomasnow
511    AbomasnowMega Abomasnow
68                        Abra
392                      Absol
393            AbsolMega Absol
Name: Name, dtype: object

In [26]:
pok.sort_values(ascending=False).head()

794    Zygarde50% Forme
695            Zweilous
46                Zubat
631               Zorua
632             Zoroark
Name: Name, dtype: object

In [27]:
pok.sort_values().head()

510                  Abomasnow
511    AbomasnowMega Abomasnow
68                        Abra
392                      Absol
393            AbsolMega Absol
Name: Name, dtype: object

In [28]:
pok.sort_values(inplace=True)
pok.head()

510                  Abomasnow
511    AbomasnowMega Abomasnow
68                        Abra
392                      Absol
393            AbsolMega Absol
Name: Name, dtype: object

In [29]:
pok.sort_index().head()

0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name, dtype: object

## Existing in Series

In [30]:
countries = ['EN', 'FR', 'PL', 'IT']
print('PL' in countries)
print('ES' in countries)

True
False


In [31]:
pok = pd.read_csv('./course-files/course-sources/pokemon.csv',
                  usecols=['Name'], squeeze=True)
pok.head()

0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name, dtype: object

In [32]:
'Venusaur' in pok

False

In [33]:
3 in pok

True

### Operator in works on list or on index in Series

In [34]:
print(pok.size)
print(pok.index)
print(900 in pok)
print(900 in pok.index)
print(799 in pok.index)

800
RangeIndex(start=0, stop=800, step=1)
False
False
True


In [35]:
pok.values[:5]

array(['Bulbasaur', 'Ivysaur', 'Venusaur', 'VenusaurMega Venusaur',
       'Charmander'], dtype=object)

In [36]:
print('Venusaur' in pok.values)
print('Venusaur asdddaa' in pok.values)

True
False


## Get value by index

In [37]:
pok[73]

'Machoke'

In [38]:
# pok[64, 74]  # it doesn't work
pok[[64,74]]

64    Arcanine
74     Machamp
Name: Name, dtype: object

In [39]:
pok[2:7]  # without 7

2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
5               Charmeleon
6                Charizard
Name: Name, dtype: object

In [40]:
pok[795:]  # from: to end

795                Diancie
796    DiancieMega Diancie
797    HoopaHoopa Confined
798     HoopaHoopa Unbound
799              Volcanion
Name: Name, dtype: object

In [41]:
pok[:4]  # from start: to 4 without

0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
Name: Name, dtype: object

In [42]:
pok[-2:]  # from last 2nd: to end

798    HoopaHoopa Unbound
799             Volcanion
Name: Name, dtype: object

## Download values from Series by methods

In [23]:
idx = ['a', 'b', 'c', 'd', 'e', 'e']
vals = ['Austria', 'Belgium', 'Canada', 'Denmark', 'England', 'Estonia']
s = pd.Series(vals, idx)
s

a    Austria
b    Belgium
c     Canada
d    Denmark
e    England
e    Estonia
dtype: object

In [24]:
s[1]

'Belgium'

In [25]:
s['b']

'Belgium'

In [26]:
s['e']

e    England
e    Estonia
dtype: object

In [1]:
# s['f']  # error if not exists

### get() - universal

In [28]:
s.get(1)

'Belgium'

In [29]:
s.get('b')

'Belgium'

In [68]:
s.get([1,2])

b    Belgium
c     Canada
dtype: object

In [31]:
s.get('f')  # not raised an error !!!

co jeśli index 1 nie jest pierwszy/drugi i chcę pobrać po 1?

### at[ ] - get value by index value

In [33]:
# s.at[1]  # error

In [35]:
s.at['b']

'Belgium'

In [37]:
print(type(s.at['e']))
s.at['e']

<class 'pandas.core.series.Series'>


e    England
e    Estonia
dtype: object

In [2]:
# s.at['f']  # error if not exists

### iat[ ] - get value by index position

co jeśli to będzie posortowana lista?

In [39]:
s.iat[1]

'Belgium'

In [41]:
# s.iat['b']  # error

In [44]:
# s.iat[[0,1]]  # error

In [46]:
# s.iat[99]  # error if not exists

### loc[ ] - get value by index value

In [48]:
# s.loc[1]  # error

In [49]:
s.loc['b']

'Belgium'

In [52]:
print(type(s.loc['e']))
s.loc['e']

<class 'pandas.core.series.Series'>


e    England
e    Estonia
dtype: object

In [55]:
# s.loc['f']  # error if not exists

które z tych poleceń działa na slice [x:y:z]

### iloc[ ] - get value by index position with multiple rows (list, slice)

In [56]:
s.iloc[1]

'Belgium'

In [58]:
# s.iloc['b']  # error

In [59]:
s.iloc[[0,1]]

a    Austria
b    Belgium
dtype: object

In [74]:
s.iloc[1:3]

b    Belgium
c     Canada
dtype: object

In [61]:
# s.iloc[99]  # error if not exists

### ix[ ] - method _get()_ works same now, download by idx or position

In [63]:
# s.ix[1]  # deprecated and deleted

### Conclusion

We could use [ ] and _get()_ like universal solution but it is not precise.


If we want to get values by index value(s) we should use _loc[ ]_ and _at[ ]_.
If we want to get value(s) by index position we should use _iloc[ ]_ and _iat[ ]_.


_loc[ ]_ & _iloc[ ]_ are better than _at[ ]_ & _iat[ ]_ because _loc[ ]_ & _iloc[ ]_ can take not only 1 index, but list or slice of indexes too.

## Reindex and intersection

In [75]:
idx = ['a', 'b', 'c', 'd', 'e1', 'e2']
vals = ['Austria', 'Belgium', 'Canada', 'Denmark', 'England', 'Estonia']
s = pd.Series(vals, idx)
s

a     Austria
b     Belgium
c      Canada
d     Denmark
e1    England
e2    Estonia
dtype: object

In [76]:
searchList = ['a', 'b']
print(s[searchList])
print(s.loc[searchList])

a    Austria
b    Belgium
dtype: object
a    Austria
b    Belgium
dtype: object


In [79]:
searchListNotFound = ['a', 'b', 'f']
# print(s[searchListNotFound])  # error
# print(s.loc[searchListNotFound])  # error

In [84]:
s.reindex(searchListNotFound)

a    Austria
b    Belgium
f        NaN
dtype: object

### intersection - elements which are in both collections

In [86]:
print(s.index)
print(s.index.intersection(searchListNotFound))

Index(['a', 'b', 'c', 'd', 'e1', 'e2'], dtype='object')
Index(['a', 'b'], dtype='object')


In [87]:
s.loc[s.index.intersection(searchListNotFound)]

a    Austria
b    Belgium
dtype: object

### what if index for search is not unique??

In [88]:
idx = ['a', 'b', 'c', 'd', 'e', 'e']
vals = ['Austria', 'Belgium', 'Canada', 'Denmark', 'England', 'Estonia']
s = pd.Series(vals, idx)
s

a    Austria
b    Belgium
c     Canada
d    Denmark
e    England
e    Estonia
dtype: object

In [90]:
# s.reindex(searchListNotFound)  # error, duplicate index

In [92]:
s.loc[s.index.intersection(searchListNotFound)]  # it works...

a    Austria
b    Belgium
dtype: object

In [93]:
print(searchListNotFound)
searchListNotFound += 'e'
print(searchListNotFound)

['a', 'b', 'f', 'e']

In [95]:
s.loc[s.index.intersection(searchListNotFound)]  # ... even duplicated values

a    Austria
b    Belgium
e    England
e    Estonia
dtype: object

In [96]:
pd.read_csv('./course-files/course-sources/pokemon.csv')

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


In [108]:
ser1 = pd.read_csv('./course-files/course-sources/pokemon.csv',
           usecols=['Attack','#'],
           squeeze=True,
           index_col='#')

In [100]:
ser2 = pd.read_csv('./course-files/course-sources/pokemon.csv',
           usecols=['Attack','Name'],
           squeeze=True,
           index_col='Name')
ser2['Bulbasaur']

49

In [120]:
print(ser2['Ivysaur':'Charmander'])
print(ser2.loc['Ivysaur':'Charmander'])

Name
Ivysaur                   62
Venusaur                  82
VenusaurMega Venusaur    100
Charmander                52
Name: Attack, dtype: int64
Name
Ivysaur                   62
Venusaur                  82
VenusaurMega Venusaur    100
Charmander                52
Name: Attack, dtype: int64


In [102]:
ser2[['Ivysaur', 'Charmander', 'Bulbasaur']]

Name
Ivysaur       62
Charmander    52
Bulbasaur     49
Name: Attack, dtype: int64

In [114]:
# ser1[['Ivysaur', 'Charmander', 'Bulbasaur']]  # error

In [111]:
# ser2.loc[2]  # error

In [112]:
ser2.iloc[2]

82

## More methods in Series (value_counts, std, mean, median)

### value_counts() - group values and sort from most

In [137]:
pok = pd.read_csv('./course-files/course-sources/pokemon.csv',
                   usecols=['Name', 'Attack'],
                   squeeze=True,
                   index_col='Name')
pokType2 = pd.read_csv('./course-files/course-sources/pokemon.csv',
                   usecols=['Name', 'Type 2'],
                   squeeze=True,
                   index_col='Name')
pokType2.value_counts()

Flying      97
Ground      35
Poison      34
Psychic     33
Fighting    26
Grass       25
Fairy       23
Steel       22
Dark        20
Dragon      18
Rock        14
Water       14
Ice         14
Ghost       14
Fire        12
Electric     6
Normal       4
Bug          3
Name: Type 2, dtype: int64

In [129]:
pokType2.value_counts().head()

Flying      97
Ground      35
Poison      34
Psychic     33
Fighting    26
Name: Type 2, dtype: int64

In [131]:
pokType2.value_counts(dropna=False).head()

NaN        386
Flying      97
Ground      35
Poison      34
Psychic     33
Name: Type 2, dtype: int64

In [135]:
pokType2.value_counts(normalize=True, dropna=False)  # how many percent?

NaN         0.48250
Flying      0.12125
Ground      0.04375
Poison      0.04250
Psychic     0.04125
Fighting    0.03250
Grass       0.03125
Fairy       0.02875
Steel       0.02750
Dark        0.02500
Dragon      0.02250
Rock        0.01750
Ice         0.01750
Water       0.01750
Ghost       0.01750
Fire        0.01500
Electric    0.00750
Normal      0.00500
Bug         0.00375
Name: Type 2, dtype: float64

In [147]:
print(pok)
print(pok.min())
print(pok.max())

Name
Bulbasaur                 49
Ivysaur                   62
Venusaur                  82
VenusaurMega Venusaur    100
Charmander                52
                        ... 
Diancie                  100
DiancieMega Diancie      160
HoopaHoopa Confined      110
HoopaHoopa Unbound       160
Volcanion                110
Name: Attack, Length: 800, dtype: int64
5
190


In [139]:
pok.value_counts()

100    40
65     39
80     37
50     37
85     33
       ..
106     1
88      1
102     1
91      1
190     1
Name: Attack, Length: 111, dtype: int64

### idxmin() / idxmax() - which idx has min / max value

In [140]:
pok.idxmin()

'Chansey'

In [149]:
print(pok.loc['Chansey'])
pok.loc[pok.idxmin()]

5


5

In [151]:
print(pok.mean())
pok.sum() / pok.size

79.00125


79.00125

### median() - central value after sort

In [152]:
pok.median()

75.0

### std() - how much values are diffrent on average to mean()

In [153]:
# in general: std() == mean() +/- mean difference
pok.std()  # in this case: 79.00125 +/- 32.45

32.45736586949843

## Data modification

In [154]:
pok

Name
Bulbasaur                 49
Ivysaur                   62
Venusaur                  82
VenusaurMega Venusaur    100
Charmander                52
                        ... 
Diancie                  100
DiancieMega Diancie      160
HoopaHoopa Confined      110
HoopaHoopa Unbound       160
Volcanion                110
Name: Attack, Length: 800, dtype: int64

In [155]:
pok * 100

Name
Bulbasaur                 4900
Ivysaur                   6200
Venusaur                  8200
VenusaurMega Venusaur    10000
Charmander                5200
                         ...  
Diancie                  10000
DiancieMega Diancie      16000
HoopaHoopa Confined      11000
HoopaHoopa Unbound       16000
Volcanion                11000
Name: Attack, Length: 800, dtype: int64

In [156]:
pok100 = pok * 100

In [172]:
pokType1 = pd.read_csv('./course-files/course-sources/pokemon.csv',
                   usecols=['Name', 'Type 1'],
                   squeeze=True,
                   index_col='Name')

In [173]:
# pokType1.upper()  # error
pokType1.str.upper()

Name
Bulbasaur                  GRASS
Ivysaur                    GRASS
Venusaur                   GRASS
VenusaurMega Venusaur      GRASS
Charmander                  FIRE
                          ...   
Diancie                     ROCK
DiancieMega Diancie         ROCK
HoopaHoopa Confined      PSYCHIC
HoopaHoopa Unbound       PSYCHIC
Volcanion                   FIRE
Name: Type 1, Length: 800, dtype: object

In [174]:
pokWithType = 'TYPE: ' + pokType1.str.upper()
pokWithType

Name
Bulbasaur                  TYPE: GRASS
Ivysaur                    TYPE: GRASS
Venusaur                   TYPE: GRASS
VenusaurMega Venusaur      TYPE: GRASS
Charmander                  TYPE: FIRE
                             ...      
Diancie                     TYPE: ROCK
DiancieMega Diancie         TYPE: ROCK
HoopaHoopa Confined      TYPE: PSYCHIC
HoopaHoopa Unbound       TYPE: PSYCHIC
Volcanion                   TYPE: FIRE
Name: Type 1, Length: 800, dtype: object

In [175]:
pokType1.value_counts()

Water       112
Normal       98
Grass        70
Bug          69
Psychic      57
Fire         52
Rock         44
Electric     44
Dragon       32
Ghost        32
Ground       32
Dark         31
Poison       28
Steel        27
Fighting     27
Ice          24
Fairy        17
Flying        4
Name: Type 1, dtype: int64

In [165]:
def replace_type(old_type):
    if old_type in ['Grass', 'Ground']:
        return 'Nature'
    else:
        return old_type

In [167]:
replace_type('Fire')

'Fire'

In [168]:
replace_type('Grass')

'Nature'

### apply() - to inject custom function to all values

In [177]:
print(pokType1)
pokType1.apply(replace_type)

Name
Bulbasaur                  Grass
Ivysaur                    Grass
Venusaur                   Grass
VenusaurMega Venusaur      Grass
Charmander                  Fire
                          ...   
Diancie                     Rock
DiancieMega Diancie         Rock
HoopaHoopa Confined      Psychic
HoopaHoopa Unbound       Psychic
Volcanion                   Fire
Name: Type 1, Length: 800, dtype: object


Name
Bulbasaur                 Nature
Ivysaur                   Nature
Venusaur                  Nature
VenusaurMega Venusaur     Nature
Charmander                  Fire
                          ...   
Diancie                     Rock
DiancieMega Diancie         Rock
HoopaHoopa Confined      Psychic
HoopaHoopa Unbound       Psychic
Volcanion                   Fire
Name: Type 1, Length: 800, dtype: object

In [178]:
pokType1.apply(lambda x_name: x_name.upper())

Name
Bulbasaur                  GRASS
Ivysaur                    GRASS
Venusaur                   GRASS
VenusaurMega Venusaur      GRASS
Charmander                  FIRE
                          ...   
Diancie                     ROCK
DiancieMega Diancie         ROCK
HoopaHoopa Confined      PSYCHIC
HoopaHoopa Unbound       PSYCHIC
Volcanion                   FIRE
Name: Type 1, Length: 800, dtype: object

In [181]:
pokType1.apply(lambda x_name:
               'Nature' if x_name in ['Grass', 'Ground']
               else x_name)

Name
Bulbasaur                 Nature
Ivysaur                   Nature
Venusaur                  Nature
VenusaurMega Venusaur     Nature
Charmander                  Fire
                          ...   
Diancie                     Rock
DiancieMega Diancie         Rock
HoopaHoopa Confined      Psychic
HoopaHoopa Unbound       Psychic
Volcanion                   Fire
Name: Type 1, Length: 800, dtype: object

## Map

near to join

In [182]:
team = pd.Series(data=[5,3,2,4,3,4,4,5],
                index=['Andy', 'Bob', 'Chris', 'Dirk', 'Francis', 'George', 'Henry', 'Ivan'])
team

Andy       5
Bob        3
Chris      2
Dirk       4
Francis    3
George     4
Henry      4
Ivan       5
dtype: int64

In [183]:
notes = pd.Series(data=['C', 'B', 'A', 'A+', 'A++'],
                 index=[1,2,3,4,5])
notes

1      C
2      B
3      A
4     A+
5    A++
dtype: object

### values from A should be indexes from B

A(idx_A, vals_A) => B(idx_b, vals_B)

&

vals_b **in** idx_b == True

In [184]:
team.map(notes)

Andy       A++
Bob          A
Chris        B
Dirk        A+
Francis      A
George      A+
Henry       A+
Ivan       A++
dtype: object

In [188]:
# it doesn't need a Series, a dict is sufficient [wystarczający]
notes_dict = {1: 'C', 2: 'B', 3:'A', 4: 'A+', 5: 'A++'}
notes_dict

{1: 'C', 2: 'B', 3: 'A', 4: 'A+', 5: 'A++'}

In [189]:
team.map(notes_dict)

Andy       A++
Bob          A
Chris        B
Dirk        A+
Francis      A
George      A+
Henry       A+
Ivan       A++
dtype: object

In [190]:
# only dicts ?
team_dict = {5: 'Andy', 3: 'Bob', 2: 'Chris', 4: 'Dirk', 3: 'Francis', 4: 'George', 4: 'Henry', 5: 'Ivan'}
team_dict

{5: 'Ivan', 3: 'Francis', 2: 'Chris', 4: 'Henry'}

In [192]:
# team_dict.map(notes_dict)  # error