1. How to create a series from a list, numpy array and dict?

In [1]:
# importing necessary python libraries
import numpy as np
import pandas as pd

In [4]:
# inputs
word_list = list(['Nishan Limbu', 'Saru Limbu', 'Priya Limbu'])
numpy_array = np.arange(1, 11)
alphabet_dictionary = {'A': 0, 'B': 1, 'C': 2, 'D':3, 'E':4, 'F':5}

In [5]:
# creating a series from the above inputs
series1 = pd.Series(word_list)
series2 = pd.Series(numpy_array)
series3 = pd.Series(alphabet_dictionary)


In [6]:
# printing all the series
print(series1)
print(series2)
print(series3)

0    Nishan Limbu
1      Saru Limbu
2     Priya Limbu
dtype: object
0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64
A    0
B    1
C    2
D    3
E    4
F    5
dtype: int64


2. How to combine many series to form a dataframe?

In [7]:
# input of two series
series1 = pd.Series(word_list)
series2 = pd.Series(numpy_array)

In [8]:
# using pandas DataFrame
df = pd.DataFrame(series1, series2).reset_index()
df

Unnamed: 0,index,0
0,1,Saru Limbu
1,2,Priya Limbu
2,3,
3,4,
4,5,
5,6,
6,7,
7,8,
8,9,
9,10,


In [9]:
# using pandas DataFrame with a dictionary, gives specific columns name
df = pd.DataFrame({'col-1':series1, 'col-2':series2})
df.head()

Unnamed: 0,col-1,col-2
0,Nishan Limbu,1
1,Saru Limbu,2
2,Priya Limbu,3
3,,4
4,,5


In [15]:
# uisng concat()
ser_df.head()

Unnamed: 0,0,1
0,Nishan Limbu,1
1,Saru Limbu,2
2,Priya Limbu,3
3,,4
4,,5


3. How to get the items of series A not present in series B?

In [16]:
# input
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [18]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

4. How to get the items not common to both series A and series B?

In [19]:
# input
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [22]:
# getting all the elements of ser1 that is not present on ser2
a_not_b = ser1[~ser1.isin(ser2)]
a_not_b

0    1
1    2
2    3
dtype: int64

In [23]:
# getting all the elements of ser2 that is not present on ser1
b_not_a = ser2[~ser2.isin(ser1)]
b_not_a

2    6
3    7
4    8
dtype: int64

In [29]:
# getting the result
result = pd.concat([a_not_b, b_not_a])
result

0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64

In [34]:
# Another method:
# using numpy unions and intersection
u = pd.Series(np.union1d(ser1, ser2))  # union
i = pd.Series(np.intersect1d(ser1, ser2))  # intersection

# final result
result = u[~u.isin(i)]

result


0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

5. How to get useful infos

In [36]:
# Compute the minimum, 25th percentile, median, 75th, and maximum of serie.

In [42]:
# input
state = np.random.RandomState(100)  # creates a RandomState object in NumPy, which is used to generate random numbers
series1 = pd.Series(state.normal(10, 5, 25))  # state.normal(mean, std_dev, size) generates random numbers from a normal (Gaussian) distribution.
series1

0      1.251173
1     11.713402
2     15.765179
3      8.737820
4     14.906604
5     12.571094
6     11.105898
7      4.649783
8      9.052521
9     11.275007
10     7.709865
11    12.175817
12     7.082025
13    14.084235
14    13.363604
15     9.477944
16     7.343598
17    15.148663
18     7.809322
19     4.408409
20    18.094908
21    17.708026
22     8.740604
23     5.787821
24    10.922593
dtype: float64

In [43]:
# getting useful information (info)
series1.describe()

count                3
unique               3
top       Nishan Limbu
freq                 1
dtype: object

6. How to get frequency counts of unique items of a series?

In [44]:
# input
alphabet_list = list('abcdefgh')
alphabet_list

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']

In [45]:
# input
numbers = np.random.randint(8, size=30)
numbers

array([0, 2, 1, 0, 0, 4, 7, 6, 0, 3, 7, 1, 6, 3, 2, 0, 6, 4, 6, 7, 0, 5,
       3, 1, 4, 3, 5, 5, 1, 6], dtype=int32)

In [47]:
 # selecting elements from the list('abcdefgh') using the indices provided by np.random.randint.
sel_elements = np.take(alphabet_list, numbers)
sel_elements

array(['a', 'c', 'b', 'a', 'a', 'e', 'h', 'g', 'a', 'd', 'h', 'b', 'g',
       'd', 'c', 'a', 'g', 'e', 'g', 'h', 'a', 'f', 'd', 'b', 'e', 'd',
       'f', 'f', 'b', 'g'], dtype='<U1')

In [49]:
# creating a seriesA
seriesA = pd.Series(sel_elements)
seriesA

0     a
1     c
2     b
3     a
4     a
5     e
6     h
7     g
8     a
9     d
10    h
11    b
12    g
13    d
14    c
15    a
16    g
17    e
18    g
19    h
20    a
21    f
22    d
23    b
24    e
25    d
26    f
27    f
28    b
29    g
dtype: object

In [52]:
# counting the values
seriesA.value_counts()

a    6
g    5
d    4
b    4
h    3
e    3
f    3
c    2
Name: count, dtype: int64