In [4]:
import pandas as pd
import numpy as np

In [6]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)


## How to convert the index of a series into a column of a dataframe?

In [16]:
df = ser.to_frame().reset_index()
print(df.head())

  index  0
0     a  0
1     b  1
2     c  2
3     e  3
4     d  4


## How to combine many series to form a dataframe?

In [17]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [21]:
df = pd.concat([ser1, ser2], axis=1)
print(df.head())

   0  1
0  a  0
1  b  1
2  c  2
3  e  3
4  d  4


## How to assign name to the series’ index?

In [24]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser.name = 'alphabets'
ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

## How to get the items of series A not present in series B?

In [27]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
ser1.name = 'ser1'
# isin() valida se o valor está presente na série, e o ~ inverte o resultado
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
Name: ser1, dtype: int64

## How to get the items not common to both series A and series B?

In [28]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [33]:
ser_u = pd.Series(np.union1d(ser1, ser2))  # union / unique no repeat
ser_i = pd.Series(np.intersect1d(ser1, ser2))  # intersect / common
print(ser_i)

0    4
1    5
dtype: int64


In [34]:
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

In [35]:
~ser_u.isin(ser_i)

0     True
1     True
2     True
3    False
4    False
5     True
6     True
7     True
dtype: bool

In [36]:
ser_u.isin(ser_i)

0    False
1    False
2    False
3     True
4     True
5    False
6    False
7    False
dtype: bool

## How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [37]:
ser = pd.Series(np.random.normal(10, 5, 25))

In [38]:
print(ser)

0     10.892579
1     14.466109
2     17.073401
3      1.627593
4     10.941670
5      8.644525
6      2.836961
7     15.184189
8      3.691791
9     13.607751
10     8.348275
11     6.115550
12     8.152078
13    15.200553
14     6.308857
15     1.899106
16     4.409031
17    11.577716
18     6.273475
19     4.812205
20     2.642932
21    14.870014
22     9.624308
23    14.824186
24    10.640633
dtype: float64


In [39]:
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 1.62759254,  4.81220502,  8.64452537, 13.6077513 , 17.07340127])

In [40]:
ser.describe()

count    25.000000
mean      8.986620
std       4.798701
min       1.627593
25%       4.812205
50%       8.644525
75%      13.607751
max      17.073401
dtype: float64

## How to get frequency counts of unique items of a series?

In [43]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

In [44]:
print(ser.head())

0    e
1    h
2    e
3    b
4    f
dtype: object
