### 1. How to import pandas and check the version?

In [2]:
import pandas as pd

# pd.show_versions(as_json=True)
print(pd.__version__)

1.2.4


### 2. How to create a series from a list, numpy array and dict?

In [3]:
import numpy as np

mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

ser1 = pd.Series(mylist)
ser2 = pd.Series(myarr)
ser3 = pd.Series(mydict) # pd.DataFrame([mydict])

### 3. How to convert the index of a series into a column of a dataframe?

In [34]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

df = ser.to_frame().reset_index()
df.head()

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


### 4. How to combine many series to form a dataframe?

In [39]:
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

df = pd.concat([ser1, ser2], axis=1)
# df = pd.DataFrame({'col1': ser1, 'col2': ser2})


### 5. How to assign name to the series’ index?

In [43]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser.rename('Coco')
# ser.name = 'alphabets'

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
Name: Coco, dtype: object

### 6. How to get the items of series A not present in series B?

In [52]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

ser3 = ser1[~ser1.isin(ser2)]
ser4 = ser2[~ser2.isin(ser1)]

### 7. How to get the items not common to both series A and series B?

In [58]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# solution 1
ser3 = ser1[~ser1.isin(ser2)]
ser4 = ser2[~ser2.isin(ser1)]
ser5 = pd.concat([ser3, ser4])

# solution 2
# ser_u = pd.Series(np.union1d(ser1, ser2))  # union
# ser_i = pd.Series(np.intersect1d(ser1, ser2))  # intersect
# ser_u[~ser_u.isin(ser_i)]

### 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [67]:
ser = pd.Series(np.random.normal(10, 5, 25))

# solution 1
ser.min()
ser.max()
ser.mean()
ser.quantile(.25)
ser.quantile(.75)

# solution 2
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 1.72505345,  8.65067857, 10.50471481, 13.02773198, 19.31216681])

### 9. How to get frequency counts of unique items of a series?

In [71]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
ser.value_counts()

g    7
b    4
h    4
c    4
d    4
e    3
a    3
f    1
dtype: int64

### 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

In [77]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

In [89]:
# solution 1
ser2 = ser.value_counts()
ser2.nlargest(2)
ser2[~(ser2 >= 4)] = 'Other'

# solution 2
ser[~ser.isin(ser.value_counts().index[:2])] = 'Other'

### 11. How to bin a numeric series to 10 groups of equal size?

In [12]:
ser = pd.Series(np.random.random(20))

In [13]:
# Solution
pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1], 
        labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']).head()

0    1st
1    3rd
2    5th
3    8th
4    7th
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']

### 12. How to convert a numpy array to a dataframe of given shape? (L1)

In [21]:
ser = pd.Series(np.random.randint(1, 10, 35))

In [22]:
df = pd.DataFrame(ser.values.reshape(7, 5))
df

Unnamed: 0,0,1,2,3,4
0,6,2,4,8,6
1,6,8,3,2,9
2,3,8,8,8,8
3,2,6,2,5,8
4,6,9,2,6,2
5,1,5,6,9,3
6,5,2,8,1,6


### 13. How to find the positions of numbers that are multiples of 3 from a series?

In [23]:
ser = pd.Series(np.random.randint(1, 10, 7))
ser

0    7
1    6
2    8
3    2
4    3
5    5
6    4
dtype: int32

In [24]:
ser[ser % 3 == 0]
# np.argwhere(ser % 3==0)

1    6
4    3
dtype: int32

### 14. How to extract items at given positions from a series

In [27]:
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]

ser[pos]
# ser.take(pos)

0     a
4     e
8     i
14    o
20    u
dtype: object

### 15. How to stack two series vertically and horizontally ?

In [43]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

In [44]:
# vertical
ser1.append(ser2)

# horizontal
# pd.DataFrame({'col1': ser1, 'col2': ser2})
pd.concat([ser1, ser2], axis=1)

Unnamed: 0,0,1
0,0,a
1,1,b
2,2,c
3,3,d
4,4,e


### 16. How to get the positions of items of series A in another series B?

In [52]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

# Solution 1
[np.where(i == ser1)[0].tolist()[0] for i in ser2]

# Solution 2
[pd.Index(ser1).get_loc(i) for i in ser2]

# Solution 3
ser1[ser1.isin(ser2)].index

Int64Index([0, 4, 5, 8], dtype='int64')

### 17. How to compute the mean squared error on a truth and predicted series?

In [54]:
truth = pd.Series(range(10))
pred = pd.Series(range(10)) + np.random.random(10)

0    0.533739
1    1.489645
2    2.543869
3    3.089315
4    4.516370
5    5.830505
6    6.632881
7    7.569571
8    8.077730
9    9.371945
dtype: float64

In [60]:
# solution 1
((truth - pred)**2).sum() / len(truth)

# solution 2
np.mean((truth-pred)**2)

0.2654110501618301

### 18. How to convert the first character of each element in a series to uppercase?

In [None]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])

# solition 0
arr = []
for i, v in ser.items():
    arr.append(v.capitalize())
ser2 = pd.Series(arr)

# solution 1
ser.map(lambda x: x.title())

# solution 2
ser.map(lambda x: x[0].upper() + x[1:])

# solution 3
pd.Series(x.title() for x in ser)

### 19. How to calculate the number of characters in each word in a series?

In [76]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])

# solution 1
ser.map(lambda x: len(x))

# solution 2
pd.Series(len(x) for x in ser)

0    3
1    2
2    4
3    4
dtype: int64

### 20. How to compute difference of differences between consequtive numbers of a series?

In [87]:
ser = pd.Series([1, 3, 6, 10, 15, 21, 27, 35])

ser.diff().tolist()
ser.diff().diff().tolist()

[nan, nan, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0]

### 21. How to convert a series of date-strings to a timeseries?

In [90]:
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])
pd.to_datetime(ser)

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]