# Pandas.Series Methods

In [102]:
import pandas as pd

## Examples to be used

In [103]:
songs_66 = pd.Series([3, None , 11, 9],
    index=['George', 'Ringo', 'John', 'Paul'],
    name='Counts')
songs_66

George     3.0
Ringo      NaN
John      11.0
Paul       9.0
Name: Counts, dtype: float64

In [104]:
songs_69 = pd.Series([18, 22, 7, 5],
    index=[ 'John', 'Paul', 'George', 'Ringo'],
    name='Counts')

songs_69

John      18
Paul      22
George     7
Ringo      5
Name: Counts, dtype: int64

## Iteration over a pandas series

### Option 1

In [105]:
for value in songs_66:
    print(value)

3.0
nan
11.0
9.0


### Option 2

In [106]:
for idx, value in songs_66.iteritems():
    print(idx, value)

George 3.0
Ringo nan
John 11.0
Paul 9.0


### Option 3

In [107]:
for key in songs_66.keys():
    print(key)

George
Ringo
John
Paul


# Overloaded Operations

## Operation, Result (All return Series)
* +, Adds scalar
* -, Subtracts scalar
* /, Divides scalar
* //, Floor divides scalar
* *, Multiply scalar
* %, Modulus scalar
* == / !=, Equality scalar
* < / >, Greater / less than scalar
* <= / >=, Greater / less than or equal scalar
* ^, Binary XOR
* |, Binary OR
* &, Binary AND

#### Arithmetic Operations

In [108]:
songs_66 + 2

George     5.0
Ringo      NaN
John      13.0
Paul      11.0
Name: Counts, dtype: float64

In [109]:
[1, 2] * 3

[1, 2, 1, 2, 1, 2]

In [110]:
print(songs_66 + songs_69)
print(songs_66)
print(songs_69)

George    10.0
John      29.0
Paul      31.0
Ringo      NaN
Name: Counts, dtype: float64
George     3.0
Ringo      NaN
John      11.0
Paul       9.0
Name: Counts, dtype: float64
John      18
Paul      22
George     7
Ringo      5
Name: Counts, dtype: int64


In [111]:
songs_66.fillna(0) + songs_69

George    10.0
John      29.0
Paul      31.0
Ringo      5.0
Name: Counts, dtype: float64

#### Comparison Operations

In [112]:
print(songs_66.fillna(0).sort_index())
print(songs_69.fillna(0).sort_index())
songs_66.fillna(0).sort_index() <= songs_69.fillna(0).sort_index()

George     3.0
John      11.0
Paul       9.0
Ringo      0.0
Name: Counts, dtype: float64
George     7
John      18
Paul      22
Ringo      5
Name: Counts, dtype: int64


George    True
John      True
Paul      True
Ringo     True
Name: Counts, dtype: bool

## Getting and Setting Values

### Getting Values

In [113]:
songs_66['John']

11.0

In [114]:
songs_66.get('Fred', 'missing')

'missing'

In [115]:
songs_66.John
# dotted attribute access supported only for valid attribute names and no conflicts with pre-existing series attributes

11.0

### Setting Values

In [116]:
songs_66['John'] = 81
songs_66

George     3.0
Ringo      NaN
John      81.0
Paul       9.0
Name: Counts, dtype: float64

In [117]:
songs_66.John = 81
songs_66['John']

81.0

## Resetting Index

In [118]:
print(songs_66)
songs_66.reset_index()

George     3.0
Ringo      NaN
John      81.0
Paul       9.0
Name: Counts, dtype: float64


Unnamed: 0,index,Counts
0,George,3.0
1,Ringo,
2,John,81.0
3,Paul,9.0


In [119]:
songs_66.reset_index(drop=True)

0     3.0
1     NaN
2    81.0
3     9.0
Name: Counts, dtype: float64

In [121]:
songs_66.reindex(['Billy', 'Eric', 'George', 'Yoko'], fill_value=-1)

Billy    -1.0
Eric     -1.0
George    3.0
Yoko     -1.0
Name: Counts, dtype: float64

In [122]:
songs_66.rename({'Ringo':'Richard'})

George      3.0
Richard     NaN
John       81.0
Paul        9.0
Name: Counts, dtype: float64

In [123]:
songs_66.rename(lambda x: x.lower())

george     3.0
ringo      NaN
john      81.0
paul       9.0
Name: Counts, dtype: float64

## Counting

In [124]:
songs_66 = pd.Series([3, None , 11, 9],
    index=['George', 'Ringo', 'John', 'Paul'],
    name='Counts')
songs_66

George     3.0
Ringo      NaN
John      11.0
Paul       9.0
Name: Counts, dtype: float64

In [125]:
scores_2 = pd.Series([67.3, 100, 96.7, None, 100],
    index=['Ringo', 'Paul', 'George', 'Peter', 'Billy'],
    name='test2')
scores_2

Ringo      67.3
Paul      100.0
George     96.7
Peter       NaN
Billy     100.0
Name: test2, dtype: float64

In [126]:
scores_2.count()

4

In [127]:
scores_2.value_counts()

100.0    2
96.7     1
67.3     1
Name: test2, dtype: int64

In [128]:
scores_2.unique()

array([ 67.3, 100. ,  96.7,   nan])

In [129]:
scores_2.nunique()

3

In [130]:
scores_2.drop_duplicates()

Ringo      67.3
Paul      100.0
George     96.7
Peter       NaN
Name: test2, dtype: float64

In [131]:
scores_2.duplicated()

Ringo     False
Paul      False
George    False
Peter     False
Billy      True
Name: test2, dtype: bool

In [132]:
scores_3 = pd.Series([67.3, 100, 96.7, None, 100, 79],
    index=['Ringo', 'Paul', 'George', 'Peter', 'Billy', 'Paul'])
scores_3

Ringo      67.3
Paul      100.0
George     96.7
Peter       NaN
Billy     100.0
Paul       79.0
dtype: float64

In [133]:
scores_3.groupby(scores_3.index).first()

Billy     100.0
George     96.7
Paul      100.0
Peter       NaN
Ringo      67.3
dtype: float64

In [134]:
scores_3.groupby(scores_3.index).last()

Billy     100.0
George     96.7
Paul       79.0
Peter       NaN
Ringo      67.3
dtype: float64

## Statistics

In [137]:
print(songs_66)
songs_66.sum()

George     3.0
Ringo      NaN
John      11.0
Paul       9.0
Name: Counts, dtype: float64


23.0

In [138]:
songs_66.sum(skipna=False)

nan

In [139]:
songs_66.mean()

7.666666666666667

In [140]:
songs_66.median()

9.0

In [141]:
songs_66.quantile() # 50% value the default

9.0

In [142]:
songs_66.quantile(.1) # 10th percentile

4.2

In [143]:
songs_66.quantile(.9) # 90th percentile

10.6

In [144]:
songs_66.describe()

count     3.000000
mean      7.666667
std       4.163332
min       3.000000
25%       6.000000
50%       9.000000
75%      10.000000
max      11.000000
Name: Counts, dtype: float64

In [145]:
songs_66.describe(percentiles=[.05, .1, .2])

count     3.000000
mean      7.666667
std       4.163332
min       3.000000
5%        3.600000
10%       4.200000
20%       5.400000
50%       9.000000
max      11.000000
Name: Counts, dtype: float64

In [146]:
songs_66.min()

3.0

In [147]:
songs_66.max()

11.0

In [148]:
songs_66.idxmin()

'George'

In [149]:
songs_66.idxmax()

'John'

In [150]:
songs_66.var()

17.333333333333336

In [151]:
songs_66.std()

4.163331998932266

In [152]:
songs_66.mad()

3.1111111111111107

## Converting Types

In [153]:
songs_66.round()

George     3.0
Ringo      NaN
John      11.0
Paul       9.0
Name: Counts, dtype: float64

In [154]:
songs_66.astype(str)

George     3.0
Ringo      nan
John      11.0
Paul       9.0
Name: Counts, dtype: object

In [155]:
songs_66.astype(str).max()

'nan'

In [156]:
pd.to_numeric(songs_66.apply(str))

ValueError: Unable to parse string "nan" at position 1

## Dealing with Null Values

In [157]:
songs_66.fillna(-1)

George     3.0
Ringo     -1.0
John      11.0
Paul       9.0
Name: Counts, dtype: float64

In [158]:
songs_66.dropna()

George     3.0
John      11.0
Paul       9.0
Name: Counts, dtype: float64

In [159]:
val_mask = songs_66.notnull()
val_mask

George     True
Ringo     False
John       True
Paul       True
Name: Counts, dtype: bool

In [160]:
songs_66[val_mask]

George     3.0
John      11.0
Paul       9.0
Name: Counts, dtype: float64

In [161]:
nan_mask = songs_66.isnull()
nan_mask

George    False
Ringo      True
John      False
Paul      False
Name: Counts, dtype: bool

In [162]:
songs_66[nan_mask]

Ringo   NaN
Name: Counts, dtype: float64

In [163]:
val_mask = ~(nan_mask)
songs_66[val_mask]

George     3.0
John      11.0
Paul       9.0
Name: Counts, dtype: float64

## Matrix Operations

In [164]:
print(songs_66)
print(songs_69)

George     3.0
Ringo      NaN
John      11.0
Paul       9.0
Name: Counts, dtype: float64
John      18
Paul      22
George     7
Ringo      5
Name: Counts, dtype: int64


In [165]:
songs_66.dot(songs_69)

nan

In [167]:
songs_66.dropna().dot(songs_66.dropna())

211.0

In [168]:
songs_66.T

George     3.0
Ringo      NaN
John      11.0
Paul       9.0
Name: Counts, dtype: float64

In [169]:
songs_66.transpose()

George     3.0
Ringo      NaN
John      11.0
Paul       9.0
Name: Counts, dtype: float64

## Append, combining, and joining two series

In [170]:
songs_66.append(songs_69)

George     3.0
Ringo      NaN
John      11.0
Paul       9.0
John      18.0
Paul      22.0
George     7.0
Ringo      5.0
Name: Counts, dtype: float64

In [171]:
songs_66.append(songs_69, verify_integrity=True)

ValueError: Indexes have overlapping values: Index(['John', 'Paul', 'George', 'Ringo'], dtype='object')

In [172]:
def avg(v1, v2):
    return (v1 + v2) / 2.0
songs_66.combine(songs_69, avg)

George     5.0
John      14.5
Paul      15.5
Ringo      NaN
Name: Counts, dtype: float64

In [173]:
songs_66.update(songs_69)
songs_66

George     7.0
Ringo      5.0
John      18.0
Paul      22.0
Name: Counts, dtype: float64

In [175]:
songs_69.repeat(3)

John      18
John      18
John      18
Paul      22
Paul      22
Paul      22
George     7
George     7
George     7
Ringo      5
Ringo      5
Ringo      5
Name: Counts, dtype: int64

## Sorting

In [176]:
songs_66.sort_values()

Ringo      5.0
George     7.0
John      18.0
Paul      22.0
Name: Counts, dtype: float64

In [177]:
songs_66.sort_values(ascending=False)

Paul      22.0
John      18.0
George     7.0
Ringo      5.0
Name: Counts, dtype: float64

In [178]:
songs_66.sort_index()

George     7.0
John      18.0
Paul      22.0
Ringo      5.0
Name: Counts, dtype: float64

In [179]:
songs_66.sort_index(ascending=False)

Ringo      5.0
Paul      22.0
John      18.0
George     7.0
Name: Counts, dtype: float64

In [180]:
songs_66.rank()

George    2.0
Ringo     1.0
John      3.0
Paul      4.0
Name: Counts, dtype: float64

## Applying a function

In [181]:
def format(x):
    if x == 1:
        template = '{} song'
    else:
        template = '{} songs'
    return template.format(x)
songs_66.map(format) # function as a parameters

George     7.0 songs
Ringo      5.0 songs
John      18.0 songs
Paul      22.0 songs
Name: Counts, dtype: object

In [186]:
songs_66.apply(lambda x: x + 1)

George     8.0
Ringo      6.0
John      19.0
Paul      23.0
Name: Counts, dtype: float64

In [182]:
songs_66.map({5: None,
              18: 21,
              22: 23}) # dictionary as a parameter

George     NaN
Ringo      NaN
John      21.0
Paul      23.0
Name: Counts, dtype: float64

In [183]:
mapping = pd.Series({22.: 33})
songs_66.map(mapping) # series as a parameter

George     NaN
Ringo      NaN
John       NaN
Paul      33.0
Name: Counts, dtype: float64

## String Operations

In [187]:
names = pd.Series(['George', 'John', 'Paul'])
names.str.lower()

0    george
1      john
2      paul
dtype: object

In [188]:
names.str.findall('o')

0    [o]
1    [o]
2     []
dtype: object

# Vectorized String Methods similar to python applicable on pandas series

## Method, Result
* contains, pattern matching boolean result
* count, pattern occurrence count result
* decode, decode a codec encoding
* encode, encode a codec encoding
* startswith, pattern matching for string start with boolean result
* endswith, pattern matching for string end with boolean result
* findall, find pattern in string
* get, attribute access on items
* join, join items with a separator into a string
* len, total size of items
* lower, convert items to lowercase
* upper, convert items to uppercase
* lstrip, removes whitespaces to the left of items
* rstrip, removes whitespaces to the right of items
* strip, removes whitespaces to both sides of items
* repeat, repeat the string a certain number of times
* replace, change/replace a particular part of string
* slice, pull slices of string
* split, split items by pattern
* title, titlecase the items

In [189]:
names

0    George
1      John
2      Paul
dtype: object

In [190]:
names.str.len()

0    6
1    4
2    4
dtype: int64

In [191]:
names.str.upper()

0    GEORGE
1      JOHN
2      PAUL
dtype: object