1. How to import pandas and check the version

In [1]:
import pandas as pd
print(pd.__version__)

1.4.2


2. How to create a series from a list, numpy array and dict

In [2]:
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

In [3]:
df_list = pd.DataFrame(mylist)

In [4]:
df_list.head(3)

Unnamed: 0,0
0,a
1,b
2,c


In [5]:
df_arr = pd.DataFrame(myarr)

In [6]:
df_arr.head(3)

Unnamed: 0,0
0,0
1,1
2,2


In [7]:
df_dict = pd.DataFrame.from_dict(mydict,orient='index')

In [8]:
df_dict.head(1)

Unnamed: 0,0
a,0


3. How to convert the index of a series into a column of a dataframe

In [9]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

In [10]:
df_ser = ser.to_frame().reset_index()

In [11]:
df_ser.head(1)

Unnamed: 0,index,0
0,a,0


4. How to combine many series to form a dataframe

In [12]:
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [13]:
df_ser1 = ser1.to_frame().reset_index()

In [14]:
df_ser1.rename(columns={0:'ser1'}, inplace=True)

In [15]:
df_ser1_new = df_ser1.assign(ser2 = ser2)

In [16]:
df_ser1_new.iloc[1]

index    1
ser1     b
ser2     1
Name: 1, dtype: object

5. How to assign name to the series’ index

In [17]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

In [18]:
ser.index.name = 'ivan_index'

In [19]:
ser.name = 'jhon_index'

In [20]:
ser

ivan_index
0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
Name: jhon_index, dtype: object

6. How to get the items of series A not present in series B

In [21]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [22]:
df_ser_ins =pd.Series(list(set(ser1) & set(ser2)))

In [23]:
df_ser_ins

0    4
1    5
dtype: int64

7. How to get the items not common to both series A and series B

In [24]:
df_ser_out = pd.concat([ser1,ser2]).drop_duplicates()

In [25]:
df_set_diff = pd.Series(list(set(df_ser_out).difference(set(df_ser_ins))))

In [26]:
df_set_diff

0    1
1    2
2    3
3    6
4    7
5    8
dtype: int64

8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series

In [27]:
ser = pd.Series(np.random.normal(10, 5, 25))

In [28]:
ser.head(5)

0    11.943235
1    10.245060
2    16.653610
3    12.146741
4     0.279948
dtype: float64

In [30]:
ser.quantile(q=0.25)

6.202322796668624

In [31]:
ser.quantile(q=0.75)

12.146740683225099

In [32]:
ser.max()

16.653610268025258

9. How to get frequency counts of unique items of a series

In [33]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))


In [38]:
ser.head(3)

0    d
1    g
2    h
dtype: object

In [39]:
uniq = ser.value_counts()

In [40]:
uniq.head(3)

d    7
g    6
b    4
dtype: int64

10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

In [45]:
ser_max_2 = uniq.nlargest(n=2, keep='first')

In [52]:
lis_to_replase  = ser_max_2.index.to_list()

In [58]:
lis_to_replase

['d', 'g']

In [63]:
uniq2 = uniq.where(uniq.index.isin(lis_to_replase), 'Other')

In [64]:
uniq2.head(3)

d        7
g        6
b    Other
dtype: object