#### 1. How to import pandas and check the version?

In [6]:
import numpy as np
import pandas as pd
pd.__version__

'0.24.2'

In [3]:
pd.show_versions(True)

{'system': {'commit': None, 'python': '3.6.1.final.0', 'python-bits': 64, 'OS': 'Windows', 'OS-release': '10', 'machine': 'AMD64', 'processor': 'Intel64 Family 6 Model 142 Stepping 9, GenuineIntel', 'byteorder': 'little', 'LC_ALL': 'None', 'LANG': 'None', 'LOCALE': 'None.None'}, 'dependencies': {'pandas': '0.24.2', 'pytest': '4.0.0', 'pip': '20.2.2', 'setuptools': '54.2.0', 'Cython': '0.29.14', 'numpy': '1.19.2', 'scipy': '1.1.0', 'pyarrow': None, 'xarray': None, 'IPython': '6.5.0', 'sphinx': '1.8.2', 'patsy': '0.5.1', 'dateutil': '2.6.1', 'pytz': '2017.3', 'blosc': None, 'bottleneck': None, 'tables': None, 'numexpr': '2.6.8', 'feather': None, 'matplotlib': '2.2.3', 'openpyxl': '2.4.8', 'xlrd': '1.1.0', 'xlwt': '1.3.0', 'xlsxwriter': '1.0.2', 'lxml.etree': '4.3.2', 'bs4': '4.9.3', 'html5lib': None, 'sqlalchemy': '1.3.12', 'pymysql': None, 'psycopg2': '2.7.7 (dt dec pq3 ext lo64)', 'jinja2': '2.10.3', 's3fs': '0.1.0', 'fastparquet': None, 'pandas_gbq': None, 'pandas_datareader': None, '

#### 2. How to create a series from a list, numpy array and dict?

In [4]:
#Create series from a list
numbers = range(1,100)
series = pd.Series(numbers)
series[:5]

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [7]:
#Create series from a numpy array
numbers = np.arange(1,100)
series = pd.Series(numbers)
series[:5]

0    1
1    2
2    3
3    4
4    5
dtype: int32

In [9]:
#Create series from a dict
numbers = {"numbers":list(range(1,100))}
series = pd.Series(numbers, name="numbers")
series[:5]

numbers    [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
Name: numbers, dtype: object

In [11]:
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
series = pd.Series(mydict, name="alphabets")
series.head()

a    0
b    1
c    2
e    3
d    4
Name: alphabets, dtype: int64

#### 3. How to convert the index of a series into a column of a dataframe?

In [13]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)
ser.head()

a    0
b    1
c    2
e    3
d    4
dtype: int64

In [17]:
pd.DataFrame({"alphabets": series, "index":series.index}).head()

Unnamed: 0,alphabets,index
a,0,a
b,1,b
c,2,c
e,3,e
d,4,d


In [20]:
ser.to_frame().reset_index().head()

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


#### 4. How to combine many series to form a dataframe?

In [21]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))
print(ser1.head())
print(ser2.head())

0    a
1    b
2    c
3    e
4    d
dtype: object
0    0
1    1
2    2
3    3
4    4
dtype: int32


In [23]:
pd.concat([ser1,ser2], axis=1).head()

Unnamed: 0,0,1
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


#### 5. How to assign name to the series’ index?

In [24]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser.head()

0    a
1    b
2    c
3    e
4    d
dtype: object

In [25]:
ser.name = 'alphabets'
ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

#### 6. How to get the items of series A not present in series B?

In [26]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [36]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

#### 7. How to get the items not common to both series A and series B?

In [37]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [39]:
pd.concat([ser1[~ser1.isin(ser2)], ser2[~ser2.isin(ser1)]])

0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64

In [44]:
s_u = pd.Series(np.union1d(ser1, ser2))
s_i = pd.Series(np.intersect1d(ser1, ser2))
s_u[~s_u.isin(s_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

#### 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [47]:
ser = pd.Series(np.random.normal(10, 5, 25))
print(f"Min:{ser.min()}, Max:{ser.max()}, 25th: {ser.quantile(.25)}, 75th:{ser.quantile(.75)}")

Min:3.3765317360201506, Max:17.66238668275329, 25th: 7.177469959524793, 75th:11.700053752294858


In [48]:
np.percentile(ser, q=[0,25,50,75,100])

array([ 3.37653174,  7.17746996,  9.22431033, 11.70005375, 17.66238668])

#### 9. How to get frequency counts of unique items of a series?

In [49]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
ser.head()

0    g
1    h
2    h
3    f
4    f
dtype: object

In [51]:
ser.value_counts()

h    6
e    6
g    6
d    4
c    3
f    2
b    2
a    1
dtype: int64

#### 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

In [84]:
ser = pd.Series(np.random.randint(1, 5, [12]))
ser.value_counts()

4    3
3    3
2    3
1    3
dtype: int64

In [89]:
top_2.head(2)

4    3
3    3
dtype: int64

In [86]:
ser[~ser.isin(top_2.index[:2])] = 'Other'
ser

0         4
1         4
2     Other
3     Other
4         4
5     Other
6         3
7         3
8     Other
9         3
10    Other
11    Other
dtype: object