In [1]:
import pandas as pd
import numpy as np

### series with default index

In [2]:
s = pd.Series([1,2,3,4])
print(s)

0    1
1    2
2    3
3    4
dtype: int64


In [3]:
print(s.index)
print(s.values)

print(type(s)) # series
print(type(s.index)) # default index is range index
print(type(s.values)) # it numpy array

RangeIndex(start=0, stop=4, step=1)
[1 2 3 4]
<class 'pandas.core.series.Series'>
<class 'pandas.core.indexes.range.RangeIndex'>
<class 'numpy.ndarray'>


#### numpy like operations based on numeric index

In [4]:
print(s[2]) # Gives the element with key 2 (just like a dictionary)

3


In [5]:
print(s[1:4:2])

1    2
3    4
dtype: int64


In [6]:
s[1] = -2  # Series individual values can be mutated using key

In [7]:
print(s)

0    1
1   -2
2    3
3    4
dtype: int64


In [8]:
s[1:3] = [4,5] # Series range of values can be mutated

In [9]:
print(s)

0    1
1    4
2    5
3    4
dtype: int64


In [10]:
print(s[[0,3]]) # Series with specific keys

0    1
3    4
dtype: int64


#### subseries : A view on the original series

In [11]:
subseries = s[1:4] # Gives another series (just like numpy gives array), it is a view on the original series
print(subseries) 
print(type(subseries))


1    4
2    5
3    4
dtype: int64
<class 'pandas.core.series.Series'>


In [12]:
try:
    subseries[0]
except KeyError:
    print("key 0 is not present in the view")
    


key 0 is not present in the view


In [13]:
subseries[1] = -1 # Original series gets modified (if the key is shared between orignial and sub series)
print(s)

0    1
1   -1
2    5
3    4
dtype: int64


In [14]:
print(subseries)

1   -1
2    5
3    4
dtype: int64


In [15]:
s[3] = -4
print(subseries) # subseries gets modified (if the key is shared between orignial and sub series)

1   -1
2    5
3   -4
dtype: int64


In [16]:
subseries[0] = -2 # new key added only in the subseries
s[4] = 10  # new key added only in the originial series
print(subseries) # 0 key got added, 4 key not added 
print(s) # 0 key not modified because the key 0 wasn't included in the sub-series when it was created
# the key 0 is different for original and sub-series

1   -1
2    5
3   -4
0   -2
dtype: int64
0     1
1    -1
2     5
3    -4
4    10
dtype: int64


### series with custom index

In [17]:
cis = pd.Series([1,2,3,4,5,6,7],index=['Sun','Mon','Tue','Wed','Thu','Fri','Sat'])

In [18]:
print(type(cis.index))
print(type(cis.values))

<class 'pandas.core.indexes.base.Index'>
<class 'numpy.ndarray'>


In [19]:
# Accessing like a dictionary with the custom index
print(cis['Wed']) # Individual keys
print(cis[['Mon','Fri']]) # Multiple keys : gives a view


#d = dict(a=3,b=9,c=27)
#print(d['b'])
#print(d[['a','b']]) # This doesn't work with dictionary

4
Mon    2
Fri    6
dtype: int64


In [20]:
# Default int index is also available
print(cis[1])

2


In [21]:
print(cis[[1,3,5]])

Mon    2
Wed    4
Fri    6
dtype: int64


In [22]:
print(cis[3:7])

Wed    4
Thu    5
Fri    6
Sat    7
dtype: int64


In [23]:
# Series with a custom int index
ciis = pd.Series([1,2,3,4,5],index=[5,2,3,6,1])

In [24]:
print(type(cis.index))
print(type(cis.values))

<class 'pandas.core.indexes.base.Index'>
<class 'numpy.ndarray'>


In [25]:
print(ciis)

5    1
2    2
3    3
6    4
1    5
dtype: int64


In [26]:
print(ciis[[3,6]])

3    3
6    4
dtype: int64


In [27]:
try : 
    print(ciis[0])
except:
    print("Doesn't work on default index but works on custom index")

Doesn't work on default index but works on custom index


In [28]:
print(ciis[1:6]) # This works on default index (better say : ordial of index)
print(ciis[11:41]) # Does not work on custom index, but uses custom index

2    2
3    3
6    4
1    5
dtype: int64
Series([], dtype: int64)


In [29]:
# Series with a custom float index
cfis = pd.Series([1,2,3,4,5],index=np.arange(0,1.5,0.3))

In [30]:
print(type(cis.index))
print(type(cis.values))

<class 'pandas.core.indexes.base.Index'>
<class 'numpy.ndarray'>


In [31]:
print(cfis)

0.0    1
0.3    2
0.6    3
0.9    4
1.2    5
dtype: int64


In [32]:
print(cfis[0.6])

3


In [33]:
try : 
    print(cfis[2])
except:
    print("As custom numeric index is defined, default index won't be available now")

As custom numeric index is defined, default index won't be available now


In [34]:
print(cfis[0:1])  # This works on the custom index
print(cfis[0.0:0.6]) # This works on the custom index

0.0    1
0.3    2
0.6    3
0.9    4
dtype: int64
0.0    1
0.3    2
0.6    3
dtype: int64


#### Series from dictionary

In [35]:
dict_index = pd.Series(dict(strawberry=4,apple=1,mange=2,orange=3))
print(dict_index.index) # sorted by index value
print(dict_index) # sorted by index value

Index(['apple', 'mange', 'orange', 'strawberry'], dtype='object')
apple         1
mange         2
orange        3
strawberry    4
dtype: int64


In [36]:
print(dict_index['apple'])
print(dict_index[2]) # not much sense, but gives the element at 2 in sorted series by index value

1
3


### Operations on series

In [37]:
s1 = pd.Series(dict(strawberry=7,banana=3,orange=4,mango=6))
s2 = pd.Series(dict(apple=1,banana=30,orange=42,mango=6))

print(s1+s2) # NaN for elements which are not present in any of the two series


apple          NaN
banana        33.0
mango         12.0
orange        46.0
strawberry     NaN
dtype: float64


In [38]:
print(s1-s2) # NaN for elements which are not present in any of the two series

apple          NaN
banana       -27.0
mango          0.0
orange       -38.0
strawberry     NaN
dtype: float64


In [39]:
print(s1+5)

banana         8
mango         11
orange         9
strawberry    12
dtype: int64


In [40]:
print(np.log2(s1))

banana        1.584963
mango         2.584963
orange        2.000000
strawberry    2.807355
dtype: float64


In [41]:
print(s1.apply(np.log2)) # Same as above, different format, any function can be used

banana        1.584963
mango         2.584963
orange        2.000000
strawberry    2.807355
dtype: float64


In [42]:
print(s1)
print(s1.apply(lambda x : x-1 if x>4 else x+1)) # lambda can be used in apply

banana        3
mango         6
orange        4
strawberry    7
dtype: int64
banana        4
mango         5
orange        5
strawberry    6
dtype: int64


In [43]:
def fun(a,b):
    return 2*a+b

print(s1.apply(fun,b=3))

banana         9
mango         15
orange        11
strawberry    17
dtype: int64


#### Filtering with boolean array

In [44]:
print(s1>4) # Returns an array which satisfies the predicate

banana        False
mango          True
orange        False
strawberry     True
dtype: bool


In [45]:
print(s1[pd.Series(dict(orange=True,banana=True,mango=True,strawberry=False))]) # key can be a boolean series

banana    3
mango     6
orange    4
dtype: int64


In [46]:
print(s1[s1>4])

mango         6
strawberry    7
dtype: int64


In [47]:
# in operator same as iterables
print('pomogranate' in s1)
print('mango' in s1)


False
True


### Missing data

In [48]:
fruits = dict(strawberry=7,banana=3,orange=4,mango=6)  # apple not in dictionary
mds = pd.Series(fruits,index = ['apple','mango','banana'])  # strawberry, orange not in the index
print(mds)  # because NaN values other present values turn into float64

apple     NaN
mango     6.0
banana    3.0
dtype: float64


In [49]:
print(mds.isnull())  # boolean array telling the null values

apple      True
mango     False
banana    False
dtype: bool


In [50]:
print(mds[mds.notnull()]) # boolean array telling the not null values

mango     6.0
banana    3.0
dtype: float64


In [51]:
print(mds.dropna()) # simplified representation of above

mango     6.0
banana    3.0
dtype: float64


In [52]:
print(mds.fillna(0)) # returns filling missing values with given value

apple     0.0
mango     6.0
banana    3.0
dtype: float64


In [53]:
print(mds.fillna(0).astype(int)) # Restore the data-type to desired : int

apple     0
mango     6
banana    3
dtype: int64


In [54]:
print(mds.fillna(dict(apple=9,banana=8))) # fill all matching NaN value keys with the dictionary keys (patch NaN with dictionary)

apple     9.0
mango     6.0
banana    3.0
dtype: float64


### Concatenating series

In [55]:
conser1 = pd.Series(dict(apple=9,banana=8))
conser2 = pd.Series(dict(orange=9,pineapple=3))

print(pd.concat([conser1,conser2]))


apple        9
banana       8
orange       9
pineapple    3
dtype: int64


### Mutating series values

In [70]:
# Adding new values
simple_series = pd.Series([1,2,3],['A','B','C'])
simple_series['X'] = 0
simple_series

A    1
B    2
C    3
X    0
dtype: int64

In [69]:
# Updating existing values
simple_series = pd.Series([1,2,3],['A','B','C'])
simple_series['A'] = 0
simple_series

A    0
B    2
C    3
dtype: int64

In [71]:
# deleting values
simple_series = pd.Series([1,2,3],['A','B','C'])
del simple_series['A']
simple_series

B    2
C    3
dtype: int64

In [68]:
simple_series = pd.Series([1,2,3],['A','B','C'])
simple_series[simple_series<3] = pd.Series(dict(A=5,B=6,C=9))
# The sub-view (and thus the original view) gets modified for the values which are present in the sub-series(A,B)
# But not for the values which are not in the view of the sub-series (C)
simple_series

A    5
B    6
C    3
dtype: int64