In [3]:
import pandas as pd
import numpy as np

In [18]:
# 1. How to import pandas and check the version?
print(pd.__version__)
# print(pd.show_versions())

1.3.5


In [23]:
# 2. How to create a series from a list, numpy array and dict?
# Create a pandas series from each of the items below: a list, numpy and a dictionary
mylist = list('abcedfghijklmnopqstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
alphabets_list = pd.Series(mylist)
arr_df = pd.Series(myarr)
dict_df = pd.Series(mydict)

In [32]:
# 3. How to convert the index of a series into a column of a dataframe?
# Difficulty Level: L1

# Convert the series ser into a dataframe with its index as another column on the dataframe.

# Input

mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)
# Solution 1
df = pd.DataFrame(ser.index, columns=['alphabets'])
# Solution 2
ser.to_frame().reset_index().loc[:, ['index']][:6]

Unnamed: 0,index
0,a
1,b
2,c
3,e
4,d
5,f


In [47]:
# 4. How to combine many series to form a dataframe?
# Difficulty Level: L1

# Combine ser1 and ser2 to form a dataframe.

# Input
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

# Solution 1
df = pd.DataFrame(data={
    "alphabets": ser1,
    "integers": ser2
})

# Solution 2
pd.concat([ser1, ser2], axis=1).head()

Unnamed: 0,0,1
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


In [49]:
# 5. How to assign name to the series’ index?
# Difficulty Level: L1

# Give a name to the series ser calling it ‘alphabets’.

# Input

ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser.name = 'alphabets'
ser[:5]

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

In [52]:
# 6. How to get the items of series A not present in series B?
# Difficulty Level: L2

# From ser1 remove items present in ser2.

ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
ser1[~(ser1.isin(ser2))]

0    1
1    2
2    3
dtype: int64

In [64]:
# 7. How to get the items not common to both series A and series B?
# Difficulty Level: L2
# Get all items of ser1 and ser2 not common to both.

# Input

ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# Solution 1
unique_ser1 = ser1[~(ser1.isin(ser2))]
unique_ser2 = ser2[~(ser2.isin(ser1))]
print(pd.concat([unique_ser1, unique_ser2]))

# Solution 2
ser_u = pd.Series(np.union1d(ser1, ser2))  # union
ser_i = pd.Series(np.intersect1d(ser1, ser2))  # intersect
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64


0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

In [83]:
# 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?
# Difficuty Level: L2

# Compute the minimum, 25th percentile, median, 75th, and maximum of ser.

# Input
np.random.seed(123)
ser = pd.Series(np.random.normal(10, 5, 25))
# Solution 1
print(f'min: {round(ser.min(), 2)}')
print(f'max: {round(ser.max(), 2)}')
print(f'25th_percentile: {np.percentile(ser, 25)}')
print(f'75th_percentile: {np.percentile(ser, 75)}')
# Solution 2
np.percentile(ser, q=[0, 25, 50, 75, 100])

min: -2.13
max: 21.03
25th_percentile: 6.605569241889729
75th_percentile: 15.879145223910516


array([-2.13339622,  6.60556924,  9.52645516, 15.87914522, 21.02965041])

In [89]:
# 9. How to get frequency counts of unique items of a series?
# Difficulty Level: L1

# Calculte the frequency counts of each unique value ser.

# Input

ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
ser.value_counts()

b    7
h    6
g    4
a    3
e    3
d    3
c    2
f    2
dtype: int64

In [127]:
# 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?
# Difficulty Level: L2

# From ser, keep the top 2 most frequent items as it is and replace everything else as ‘Other’.

# Input

np.random.seed(123)
ser = pd.Series(np.random.randint(1, 5, [12]))
print(ser)
print(ser.value_counts().nlargest(2))
ser[~(ser.isin(ser.value_counts().nlargest(2).index))] = 'other'
ser

0     3
1     2
2     3
3     3
4     1
5     3
6     3
7     2
8     4
9     3
10    4
11    2
dtype: int32
3    6
2    3
dtype: int64


0         3
1         2
2         3
3         3
4     other
5         3
6         3
7         2
8     other
9         3
10    other
11        2
dtype: object

In [136]:
# 1. How to bin a numeric series to 10 groups of equal size?
# Difficulty Level: L2

# Bin the series ser into 10 equal deciles and replace the values with the bin name.

# Input
np.random.seed(123)
ser = pd.Series(np.random.random(20))

# Desired Output

# # First 5 items
# 0    7th
# 1    9th
# 2    7th
# 3    3rd
# 4    8th
# dtype: category
# Categories (10, object): [1st < 2nd < 3rd < 4th ... 7th < 8th < 9th < 10th]
print(
    pd.qcut(ser,
           q=np.arange(0, 1.1, 0.1),
           labels=[
               '1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th',
               '10th'
           ])[:5])

0    8th
1    3rd
2    2nd
3    7th
4    9th
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']


In [144]:
# 12. How to convert a numpy array to a dataframe of given shape? (L1)
# Difficulty Level: L1

# Reshape the series ser into a dataframe with 7 rows and 5 columns

# Input

# Solution 1
ser = pd.Series(np.random.randint(1, 10, 35))
print(pd.DataFrame(np.array(ser).reshape((7, 5))).head())
print('=====================')
# Solution 2
df = pd.DataFrame(ser.values.reshape(7,5))
print(df.head())

   0  1  2  3  4
0  1  3  7  6  2
1  6  4  7  5  8
2  2  7  1  4  2
3  4  1  9  6  2
4  5  4  3  3  1
   0  1  2  3  4
0  1  3  7  6  2
1  6  4  7  5  8
2  2  7  1  4  2
3  4  1  9  6  2
4  5  4  3  3  1


In [165]:
# 13. How to find the positions of numbers that are multiples of 3 from a series?
# Difficulty Level: L2

# Find the positions of numbers that are multiples of 3 from ser.

# Input
np.random.seed(123)
ser = pd.Series(np.random.randint(1, 10, 7))
print(ser)
print('===============')
# Solution 1
print(ser[ser % 3 == 0].index)
# Solution 2
np.argwhere(np.array(ser) % 3 == 0)

0    3
1    3
2    7
3    2
4    4
5    7
6    2
dtype: int32
Int64Index([0, 1], dtype='int64')


array([[0],
       [1]], dtype=int64)

In [169]:
# 14. How to extract items at given positions from a series
# Difficulty Level: L1

# From ser, extract the items at positions in list pos.

# Input

ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]
# Solution 1
print(ser[pos])
# Solution 2
print(np.take(ser, pos))

0     a
4     e
8     i
14    o
20    u
dtype: object
0     a
4     e
8     i
14    o
20    u
dtype: object


In [171]:
# 15. How to stack two series vertically and horizontally ?
# Difficulty Level: L1

# Stack ser1 and ser2 vertically and horizontally (to form a dataframe).

# Input

ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))
print(pd.concat([ser1, ser2]))
print(pd.concat([ser1, ser2], axis=1))

0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object
   0  1
0  0  a
1  1  b
2  2  c
3  3  d
4  4  e
