In [22]:
import pandas as pd
import numpy as np

def p(o):
  print(o)
  print()

In [23]:
a = pd.Series([1,2,3,4])
b = np.array([2,4,6,8])
c = pd.Series(b, index=['a','b','c','d'])

p(b)
p(c)

[2 4 6 8]

a    2
b    4
c    6
d    8
dtype: int64



In [24]:
d = pd.Series([1,0,2,1,2,3], index=['white','white','blue','green','green','yellow'])

p(d.unique()) # prints in order found
p(d.value_counts()) # prints descending order of occurence
p(d.isin([0, 1]))
p(d['white'])
p(pd.Series([0,1,np.nan, 2]))

[1 0 2 3]

1    2
2    2
0    1
3    1
Name: count, dtype: int64

white      True
white      True
blue      False
green      True
green     False
yellow    False
dtype: bool

white    1
white    0
dtype: int64

0    0.0
1    1.0
2    NaN
3    2.0
dtype: float64



In [25]:
# Series from dict and arr

e = {'a': 1, 'b': 2, 'c': 3}
f = pd.Series(e)
p(f['a'])

colors = ['red','yellow','orange','blue','green']
g = pd.Series(colors, colors[::-1])
p(g)

1

green        red
blue      yellow
orange    orange
yellow      blue
red        green
dtype: object



In [26]:
# Operations on Series
h = pd.Series([100, 200, 300], index=colors[0:3])
i = pd.Series([400, 500, 600], index=colors[1:4])

p(h)
p(i)
p(h+i)
p(h-i)
p(h*i)
p(h/i)

red       100
yellow    200
orange    300
dtype: int64

yellow    400
orange    500
blue      600
dtype: int64

blue        NaN
orange    800.0
red         NaN
yellow    600.0
dtype: float64

blue        NaN
orange   -200.0
red         NaN
yellow   -200.0
dtype: float64

blue           NaN
orange    150000.0
red            NaN
yellow     80000.0
dtype: float64

blue      NaN
orange    0.6
red       NaN
yellow    0.5
dtype: float64



In [27]:
p(h)
p(h.index)
p(h.values)

red       100
yellow    200
orange    300
dtype: int64

Index(['red', 'yellow', 'orange'], dtype='object')

[100 200 300]



In [28]:
# DF from dict[str, list[any]]
data = {
  'color' : ['blue','green','yellow','red','white'],
  'object' : ['ball','pen','pencil','paper','mug'],
  'price' : [1.2,1.0,0.6,0.9,1.7]
}

j = pd.DataFrame(data)
j.index.name = 'id'
j.columns.name = 'item'
p(j)
p(j.index)
p(j.columns)
p(j.values)

item   color  object  price
id                         
0       blue    ball    1.2
1      green     pen    1.0
2     yellow  pencil    0.6
3        red   paper    0.9
4      white     mug    1.7

RangeIndex(start=0, stop=5, step=1, name='id')

Index(['color', 'object', 'price'], dtype='object', name='item')

[['blue' 'ball' 1.2]
 ['green' 'pen' 1.0]
 ['yellow' 'pencil' 0.6]
 ['red' 'paper' 0.9]
 ['white' 'mug' 1.7]]



In [29]:
p(j.color)
p(j.loc[1])
p(j[0:2])

id
0      blue
1     green
2    yellow
3       red
4     white
Name: color, dtype: object

item
color     green
object      pen
price       1.0
Name: 1, dtype: object

item  color object  price
id                       
0      blue   ball    1.2
1     green    pen    1.0



In [44]:
j['new'] = 12
p(j)
j['new'] = pd.Series([12, 13, 14, 15, 16])
j.loc[2, 'price'] = 9
p(j)

item   color  object  price  new
id                              
0       blue    ball    1.2   12
1      green     pen    1.0   12
2     yellow  pencil    9.0   12
3        red   paper    0.9   12
4      white     mug    1.7   12

item   color  object  price  new
id                              
0       blue    ball    1.2   12
1      green     pen    1.0   13
2     yellow  pencil    9.0   14
3        red   paper    0.9   15
4      white     mug    1.7   16



In [31]:
# DF from nested dict
k = pd.DataFrame({'red': {'a': 10, 'b': [1,2]}, 'blue': {'a': 20, 'c': 30}})
k.columns.name = 'aryan'
k.index.name = 'nayra'
p(k)
p(k.T)

aryan     red  blue
nayra              
a          10  20.0
b      [1, 2]   NaN
c         NaN  30.0

nayra     a       b     c
aryan                    
red      10  [1, 2]   NaN
blue   20.0     NaN  30.0



In [43]:
l = pd.Series([1,2,3,4][::-1], index=[0, 1, 0, 1])
p(l)
p(l.idxmin())
p(l.idxmax())
p(l.is_unique)
p(l.index.is_unique)
p(l.index.unique())
p(l.index.value_counts())

0    4
1    3
0    2
1    1
dtype: int64

1

0

True

False

Index([0, 1], dtype='int64')

0    2
1    2
Name: count, dtype: int64



In [None]:
# Dropping, Alignment
p(k)
p(k.drop(['a'], axis=0))
p(k.drop(['red'], axis=1))

m = pd.DataFrame(
  np.arange(16).reshape((4,4)), 
  index=['red','blue','yellow','white'], 
  columns=['ball','pen','pencil','paper']
)

n = pd.DataFrame(
  np.arange(12).reshape((4,3)),
  index=['blue','green','white','yellow'],
  columns=['mug','pen','ball']
)

p(m)
p(n)
m+n

aryan     red  blue
nayra              
a          10  20.0
b      [1, 2]   NaN
c         NaN  30.0

aryan     red  blue
nayra              
b      [1, 2]   NaN
c         NaN  30.0

aryan  blue
nayra      
a      20.0
b       NaN
c      30.0

        ball  pen  pencil  paper
red        0    1       2      3
blue       4    5       6      7
yellow     8    9      10     11
white     12   13      14     15

        mug  pen  ball
blue      0    1     2
green     3    4     5
white     6    7     8
yellow    9   10    11



Unnamed: 0,ball,mug,paper,pen,pencil
blue,6.0,,,6.0,
green,,,,,
red,,,,,
white,20.0,,,20.0,
yellow,19.0,,,19.0,


In [42]:
ser = pd.Series(np.arange(4), index=['ball','pen','pencil','paper'])
p(ser)
p(m)
m - ser

ball      0
pen       1
pencil    2
paper     3
dtype: int64

        ball  pen  pencil  paper
red        0    1       2      3
blue       4    5       6      7
yellow     8    9      10     11
white     12   13      14     15



Unnamed: 0,ball,pen,pencil,paper
red,0,0,0,0
blue,4,4,4,4
yellow,8,8,8,8
white,12,12,12,12


In [35]:
f = lambda x: x.max() - x.min()
p(n)
p(n.apply(f))
p(n.apply(f, axis=1))

def f(x):
  return pd.Series([x.min(), x.max()], index=['min', 'max'])

p(n.apply(f, axis=0))
p(n.apply(f, axis=1))

        mug  pen  ball
blue      0    1     2
green     3    4     5
white     6    7     8
yellow    9   10    11

mug     9
pen     9
ball    9
dtype: int64

blue      2
green     2
white     2
yellow    2
dtype: int64

     mug  pen  ball
min    0    1     2
max    9   10    11

        min  max
blue      0    2
green     3    5
white     6    8
yellow    9   11



In [36]:
p(m)
p(m.describe())
p(m.sort_index())
p(m.sort_values(by='ball'))
p(ser.sort_index())
p(ser.rank())
p(m.sort_index(by=['ball']))

        ball  pen  pencil  paper
red        0    1       2      3
blue       4    5       6      7
yellow     8    9      10     11
white     12   13      14     15

            ball        pen     pencil      paper
count   4.000000   4.000000   4.000000   4.000000
mean    6.000000   7.000000   8.000000   9.000000
std     5.163978   5.163978   5.163978   5.163978
min     0.000000   1.000000   2.000000   3.000000
25%     3.000000   4.000000   5.000000   6.000000
50%     6.000000   7.000000   8.000000   9.000000
75%     9.000000  10.000000  11.000000  12.000000
max    12.000000  13.000000  14.000000  15.000000

        ball  pen  pencil  paper
blue       4    5       6      7
red        0    1       2      3
white     12   13      14     15
yellow     8    9      10     11

        ball  pen  pencil  paper
red        0    1       2      3
blue       4    5       6      7
yellow     8    9      10     11
white     12   13      14     15

ball      0
paper     3
pen       1
pencil    2
dty

TypeError: DataFrame.sort_index() got an unexpected keyword argument 'by'

In [37]:
seq2 = pd.Series([3,4,3,4,5,4,3,2],['2006','2007','2008','2009','2010','2011','2012','2013'])
seq = pd.Series([1,2,3,4,4,3,2,1],['2006','2007','2008','2009','2010','2011','2012','2013'])
p(m)
p(m.corr())
p(seq2.corr(seq))
p(seq2.cov(seq))

        ball  pen  pencil  paper
red        0    1       2      3
blue       4    5       6      7
yellow     8    9      10     11
white     12   13      14     15

        ball  pen  pencil  paper
ball     1.0  1.0     1.0    1.0
pen      1.0  1.0     1.0    1.0
pencil   1.0  1.0     1.0    1.0
paper    1.0  1.0     1.0    1.0

0.7745966692414834

0.8571428571428571



In [38]:
ser = pd.Series([1,2,np.nan,4])
p(ser)
p(ser.dropna())
p(ser[ser.notna()])
p(ser[ser.notnull()])

0    1.0
1    2.0
2    NaN
3    4.0
dtype: float64

0    1.0
1    2.0
3    4.0
dtype: float64

0    1.0
1    2.0
3    4.0
dtype: float64

0    1.0
1    2.0
3    4.0
dtype: float64



In [39]:
mser = pd.Series((1,2,3,4), index=[['a','a','b','c'],[5,1,2,2]])
p(mser)
p(mser['a'])
p(mser[1])
p(mser[:2])
p(mser[:'a'])

a  5    1
   1    2
b  2    3
c  2    4
dtype: int64

5    1
1    2
dtype: int64

2

a  5    1
   1    2
dtype: int64

a  5    1
   1    2
dtype: int64



  p(mser[1])


In [40]:
mser2 = pd.Series(np.random.rand(8), index=[['white','white','white','blue','blue','red','red','red'], ['up','down','right','up','down','up','down','left']])
mser2

white  up       0.391424
       down     0.949336
       right    0.579576
blue   up       0.678146
       down     0.786210
red    up       0.634139
       down     0.057947
       left     0.894720
dtype: float64

In [47]:
p(mser)
p(mser.unstack())
p(m)
p(m.stack())

a  5    1
   1    2
b  2    3
c  2    4
dtype: int64

     1    2    5
a  2.0  NaN  1.0
b  NaN  3.0  NaN
c  NaN  4.0  NaN

        ball  pen  pencil  paper
red        0    1       2      3
blue       4    5       6      7
yellow     8    9      10     11
white     12   13      14     15

red     ball       0
        pen        1
        pencil     2
        paper      3
blue    ball       4
        pen        5
        pencil     6
        paper      7
yellow  ball       8
        pen        9
        pencil    10
        paper     11
white   ball      12
        pen       13
        pencil    14
        paper     15
dtype: int64

