In [1]:
import pandas as pd
import numpy as np

In [3]:
# 2. How to create a series from a list, numpy array and dict?
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist,myarr))

In [4]:
pd.Series(mylist)

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [5]:
pd.Series(myarr)

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
25    25
dtype: int64

In [6]:
pd.Series(mydict)

a     0
b     1
c     2
e     3
d     4
f     5
g     6
h     7
i     8
j     9
k    10
l    11
m    12
n    13
o    14
p    15
q    16
r    17
s    18
t    19
u    20
v    21
w    22
x    23
y    24
z    25
dtype: int64

In [17]:
# 3. How to convert the index of a series into a column of a dataframe?
ser = pd.Series(mydict)

# One way
df = pd.DataFrame(ser)
df[1]=ser.index
df

# Better way
df = ser.to_frame().reset_index()
df.head()

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


In [26]:
# 4. How to combine many series to form a dataframe?
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

#One way
df = pd.concat([ser1,ser2], axis=1)
df

#Another way
df = pd.DataFrame({'col1': ser1, 'col2':ser2})
df

Unnamed: 0,col1,col2
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


In [27]:
# 5. How to assign name to the series’ index?
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

#One way
ser.rename('alphabets')

#Another way
ser.name = 'alphabets'

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
Name: alphabets, dtype: object

In [31]:
# 6. How to get the items of series A not present in series B?
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

ser3 = ser1[~ser1.isin(ser2)]
ser3

0    1
1    2
2    3
dtype: int64

In [36]:
# 7. How to get the items not common to both series A and series B?
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# One way
ser3 = ser1[~ser1.isin(ser2)].append(ser2[~ser2.isin(ser1)])
ser3

# Better way (easier to deal with more than 2 series)
union = pd.Series(np.union1d(ser1,ser2))
intersection = pd.Series(np.intersect1d(ser1,ser2))
union[~union.isin(intersection)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

In [43]:
# 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?
ser = pd.Series(np.random.normal(10, 5, 25))

print('min: ', ser.min())
print('25th percentile: ', np.percentile(ser,25))
print('median: ',ser.median())
print('75th percentile: ', np.percentile(ser,75))
print('max: ',ser.max())

min:  -6.263339472810124
25th percentile:  5.807291508241531
median:  7.8204301413852555
75th percentile:  11.16893299136411
max:  18.447805017609177


In [45]:
# Another way
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([-6.26333947,  5.80729151,  7.82043014, 11.16893299, 18.44780502])

In [59]:
# 9. How to get frequency counts of unique items of a series?
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
ser.value_counts()

a    8
d    7
h    6
e    3
c    2
g    2
f    1
b    1
dtype: int64

In [62]:
# 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
ser
#ser[ser.value_counts()[2:]] = 'Other'
#ser

0     3
1     2
2     1
3     3
4     4
5     1
6     4
7     2
8     4
9     3
10    4
11    2
dtype: int64

In [66]:
ser[ser.isin(ser.value_counts()[2:])] = 'Other'
ser

0     Other
1     Other
2         1
3     Other
4         4
5         1
6         4
7     Other
8         4
9     Other
10        4
11    Other
dtype: object