In [None]:
from pandas import Series
import pandas as pd

a = Series([1, 2, 3, 4])

# print values
a.values
# indices
a.index

# use different indices
b = Series([10, 20, 30, 50], ['Mayur', 'Naruto', 'Uzumaki', 'Sasuke'])

# can now use index as 'Mayur', this is like dictionary
b['Mayur']

10

## Operations on Series


In [7]:
# can select using bool operations
b[b > 20]

# is some index present in the Series
'Mayur' in b  # returns True

# convert to dict
b_dict = b.to_dict()
b_dict  # {'Mayur': 10, 'Naruto': 20, 'Sasuke': 50, 'Uzumaki': 30}

# Series can also be constructed from dictionary
series_from_dict = Series(b_dict)
series_from_dict

# passing custom index from list will keep the existing
# keys from the list same while inserting NaN for new ones
# eg: {'a': 10, 'b': 20} and passing index list as ['a', 'b', 'c']
# will have effect: {'a': 10, 'b': 20, 'c': NaN}
# these NaNs can be checked using pd.isnull()
a = Series(series_from_dict, index=['Mayur', 'Sakura'])
a
pd.isnull(a)  # will have entry true for 'Sakura'

# can also check for notnull, just opposite of isnull
pd.notnull(a)

# adding series to series will sort them by index
# and add values corresponding to similar indices
# Indices present in one but not other will ending
# up being NaN
a = Series([1, 2, 3, 4], ['a', 'b', 'c', 'd'])
b = Series([5, 6, 7, 8], ['a', 'b', 'g', 'h'])
a + b

# We can set name for Series and it's indices
a.name = "Random Series"
a.index.name = "This is an index name"
a

This is an index name
a    1
b    2
c    3
d    4
Name: Random Series, dtype: int64

## Indices can't be edited since they're immuatble

In [7]:
a = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])
a
a.index[0] = 'Z'  # Error: Index does not support mutable operations

TypeError: Index does not support mutable operations

## Reindexing

In [None]:
a = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])
a
new_index = ['C', 'D', 'E', 'F']
b = a.reindex(new_index)
b
# Since 'E' and 'F' were not in the original index list
# they'll be set to NaN, can be filled with default
# values using fill_value
c = a.reindex(['X', 'Y', 'Z', 'L'], fill_value=-1)
c  # No NaNs, filled with -1 instead

X   -1
Y   -1
Z   -1
L   -1
dtype: int64

In [None]:
# There are different methods that you can reindex with
# ffill = forward fill
# eg: {1: USA, 5: India, 10:Mexico}
# and If I pass the index to be range(15) then 
# indices 0-4 will have value 'USA', 5-9 will have 'India'
# 10-14 will have 'Mexico'
a = Series(['USA', 'India', 'Mexico'], index=[0, 5, 10])
a.reindex(range(15), method='ffill')

# method = 'bkfill' will do the same thing but in 
# opposite order, remember last entries will be NaN
a.reindex(range(15), method='bfill')

0        USA
1      India
2      India
3      India
4      India
5      India
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Mexico
11       NaN
12       NaN
13       NaN
14       NaN
dtype: object

## Dropping entries

In [None]:
a = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])
a
# series.drop(label)
a.drop('A')

B    2
C    3
D    4
dtype: int64

## Selecting Entries 

In [None]:
ser = Series([1, 2, 3], index=['A', 'B', 'C'])
ser = ser * 2
ser

# series[label]
ser['A']

# series[index]
ser[0]

# series[from_incl:to_excl]
ser[0:3]

# series[list_of_labels]
ser[['A', 'B', 'C']]

# series[boolean logic]
ser[ser > 3]

# return boolean series
# where the boolean condition
# is satisfied
ser > 3

A    False
B     True
C     True
dtype: bool

## Difference in loc, iloc, and ix

In [None]:
# loc: loc works on labels in the index.
# iloc: iloc works on the positions in the index (so it only takes integers).
# ix: ix usually tries to behave like loc but falls back to behaving like iloc if the label is not in the index.
# It's important to note some subtleties that can make ix slightly tricky to use:

# 1. if the index is of integer type, ix will only use label-based indexing and not fall back to position-based 
# indexing. If the label is not in the index, an error is raised. 


# 2. if the index does not contain only integers, then given an integer, ix will immediately use position-based indexing
# rather than label-based indexing. If however ix is given another type (e.g. a string), it can use label-based 
# indexing.

## Data Alignment

In [None]:
ser1 = Series([0, 1, 2], index=['A', 'B', 'C'])
ser1
ser2 = Series([3, 4, 5, 6], index=['A', 'B', 'C', 'D'])
ser2

# Adds up the values where the indices are matched
# unmatched values are NaN
ser1 + ser2

A    False
B    False
C    False
D     True
dtype: bool

## Getting Series from DataFrame

In [None]:
from pandas import DataFrame
import numpy as np

df = DataFrame(np.arange(9).reshape((3, 3)), index=[1, 2, 3], columns=['1', '2', '3'])
# This returns a Series
ser = df.ix[1]

ser

# This will subtract the values where the index 
# and columns match
df - ser

Unnamed: 0,1,2,3
1,0,0,0
2,3,3,3
3,6,6,6


## Sorting and Ranking

In [None]:
ser = Series(np.arange(3), index=['C', 'A', 'B'])

# Sort series according to index
ser.sort_index()

# Sort series according to values
ser.sort_values()  # Deprecated: series.order()

ser = Series(np.random.rand(10), index=np.arange(10))
ser
ser.sort_values()

# rank will give you the index and it's corresponding
# position after you sort it
ser.rank()

0    0.231772
1    0.450153
2    0.264536
3    0.761744
4    0.476350
5    0.977210
6    0.784463
7    0.704043
8    0.870795
9    0.476925
dtype: float64
0    0.231772
2    0.264536
1    0.450153
4    0.476350
9    0.476925
7    0.704043
3    0.761744
6    0.784463
8    0.870795
5    0.977210
dtype: float64


0     1.0
1     3.0
2     2.0
3     7.0
4     4.0
5    10.0
6     8.0
7     6.0
8     9.0
9     5.0
dtype: float64

## Unique Values and Value Counts

## Unique Values and Value Counts

In [None]:
ser = Series(['1', '1', '2', '3', '4'])
ser
# print the unique values
ser.unique()

# prints number of times a value occurs
ser.value_counts()

0    1
1    1
2    2
3    3
4    4
dtype: object
['1' '2' '3' '4']
1    2
4    1
2    1
3    1
dtype: int64


## Fill missing data

In [None]:
ser = Series([1, 2, 3, np.nan, 5])
ser

# Tells if some value is NaN, returns True
ser.isnull()

# Drop NaN values
ser.dropna()

# fill NaNs with something
ser.fillna(1)

0    1.0
1    2.0
2    3.0
3    1.0
4    5.0
dtype: float64

## Index hierarchy

In [None]:
ser = Series(randn(6), index=[[1, 1, 1, 2, 2, 2], 
                              ['a', 'b', 'c', 'a', 'e', 'f']])
ser.index
# will return 
# MultiIndex(levels=[[1, 2], [u'a', u'b', u'c', u'd', u'e', u'f']],
#           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 3, 4, 5]])

ser[2]  # will return 3 values, for index 'd', 'e', 'f'

# above command will select from level 0
# below will select it from level 1
# eg. from all level 0, where level 1 is 'a'
ser[:, 'a']

# unstack will convert level 0 indices to 
# DataFrame's index, and level 1 indices to
# DataFrame columns
df = ser.unstack()
df

Unnamed: 0,a,b,c,e,f
1,0.839926,-0.160235,-1.629168,,
2,-0.928232,,,0.970002,-1.410178
