In [39]:
import pandas as pd

In [None]:
# Pandas Series with Dictionary.

veg = pd.Series({'Potato': 45, 'Tomato': 35, 'Green Chilli': 20, 'Beetroot': 25, 'Ladies Finger': 35})
veg

Potato           45
Tomato           35
Green Chilli     20
Beetroot         25
Ladies Finger    35
dtype: int64

In [41]:
# Returns series values as array.

veg.values

array([45, 35, 20, 25, 35])

In [42]:
# Return Series-Index
veg.index

Index(['Potato', 'Tomato', 'Green Chilli', 'Beetroot', 'Ladies Finger'], dtype='object')

In [43]:
# Shape of the series - Returns no of elements available in the series.

veg.shape


(5,)

In [44]:
# Subset of this series.

# Position Index.
veg_position_index = veg.iloc[0]
print(veg_position_index)

45


In [45]:
# Label Index.

veg_label_index=veg.loc['Beetroot']
print(veg_label_index)

25


In [46]:
# Range of records - Last Range is included in Label Index

veg['Potato':'Beetroot']


Potato          45
Tomato          35
Green Chilli    20
Beetroot        25
dtype: int64

In [47]:
# Range of records - Last Range is excluded in Position Index

veg[0:3]

Potato          45
Tomato          35
Green Chilli    20
dtype: int64

In [48]:
# Extract only few elements from the dictionary

veg[['Potato','Green Chilli','Ladies Finger']]

Potato           45
Green Chilli     20
Ladies Finger    35
dtype: int64

In [49]:
# Starts from Potato and end range is blank

veg['Tomato': ]

Tomato           35
Green Chilli     20
Beetroot         25
Ladies Finger    35
dtype: int64

In [50]:
# Starts from blank range till Beetroot

veg[:'Beetroot']

Potato          45
Tomato          35
Green Chilli    20
Beetroot        25
dtype: int64

In [51]:
# Positional Slicing.

# From 0 to 5 -> (5-1)
veg[0:5]

Potato           45
Tomato           35
Green Chilli     20
Beetroot         25
Ladies Finger    35
dtype: int64

In [52]:
# For every 2nd index.
veg[0::2]

Potato           45
Green Chilli     20
Ladies Finger    35
dtype: int64

In [53]:
veg[0::3]

Potato      45
Beetroot    25
dtype: int64

In [55]:
#iloc/loc
#loc: label based indexing
#iloc : position based indexing

veg

Potato           45
Tomato           35
Green Chilli     20
Beetroot         25
Ladies Finger    35
dtype: int64

In [58]:
# Label based indexing
veg.loc['Potato':'Beetroot']

Potato          45
Tomato          35
Green Chilli    20
Beetroot        25
dtype: int64

In [59]:
# Position based indexing
veg.iloc[0:4]

Potato          45
Tomato          35
Green Chilli    20
Beetroot        25
dtype: int64

Comparison Operator: 

In [66]:
veg

Potato           45
Tomato           35
Green Chilli     20
Beetroot         25
Ladies Finger    35
dtype: int64

In [65]:
veg > 25
# Checking if any values > 25, If yes - True and any values <=25 = False


Potato            True
Tomato            True
Green Chilli     False
Beetroot         False
Ladies Finger     True
dtype: bool

In [68]:
veg[veg>25]
# Getting the filtered values by calling veg[subset of veg>25]

Potato           45
Tomato           35
Ladies Finger    35
dtype: int64

In [69]:
# For top 5 elements

veg.head()

Potato           45
Tomato           35
Green Chilli     20
Beetroot         25
Ladies Finger    35
dtype: int64

In [70]:
# Top 2 elements
veg.head(2)

Potato    45
Tomato    35
dtype: int64

In [73]:
# Bottom 5 elements
veg.tail()

Potato           45
Tomato           35
Green Chilli     20
Beetroot         25
Ladies Finger    35
dtype: int64

In [75]:
# Last 3 elements
veg.tail(3)

Green Chilli     20
Beetroot         25
Ladies Finger    35
dtype: int64

In [88]:
# Check any value belongs to this series or not

veg.isin([20])
# Valid case

Potato           False
Tomato           False
Green Chilli      True
Beetroot         False
Ladies Finger    False
dtype: bool

In [89]:
veg.isin([40])
# invalid case.

Potato           False
Tomato           False
Green Chilli     False
Beetroot         False
Ladies Finger    False
dtype: bool

In [87]:
# Exact Value is extracted
veg[veg.isin([20])]

Green Chilli    20
dtype: int64

Find unique values and their frequencies

In [92]:
list('abcd')

['a', 'b', 'c', 'd']

In [93]:
print('abcd'*4)

abcdabcdabcdabcd


In [91]:
# The string - "abcd"*4 times-> abcdabcdabcdabcd, is packed in a list - converted to list of characters from string.
# wrapped into 1D Array i.e series. 
new_data = pd.Series(list('abcd'*4))
new_data

0     a
1     b
2     c
3     d
4     a
5     b
6     c
7     d
8     a
9     b
10    c
11    d
12    a
13    b
14    c
15    d
dtype: object

In [94]:
# unique() - Name of all unique records
# nunique() - Count of unique records
# value_counts() - Name of unique records with their occurences.

In [96]:
new_data.unique()
# Name of unique values

array(['a', 'b', 'c', 'd'], dtype=object)

In [98]:
new_data.nunique()
# Total records of unique

4

In [99]:
new_data.value_counts()

a    4
b    4
c    4
d    4
Name: count, dtype: int64

In [100]:
# Get the duplicate values
# Duplicated values are represented with True. and non-duplicate with False

new_data.duplicated()


0     False
1     False
2     False
3     False
4      True
5      True
6      True
7      True
8      True
9      True
10     True
11     True
12     True
13     True
14     True
15     True
dtype: bool

In [101]:
new_data[new_data.duplicated()]

4     a
5     b
6     c
7     d
8     a
9     b
10    c
11    d
12    a
13    b
14    c
15    d
dtype: object

In [104]:
# Return Series with duplicates removed, but does not apply back to original series
new_data.drop_duplicates()

0    a
1    b
2    c
3    d
dtype: object

In [103]:
new_data

0     a
1     b
2     c
3     d
4     a
5     b
6     c
7     d
8     a
9     b
10    c
11    d
12    a
13    b
14    c
15    d
dtype: object

In [106]:
# Sorting the data
# sort_values
# sort_index

veg.sort_values()
# Values are sorted in ascending order


Green Chilli     20
Beetroot         25
Tomato           35
Ladies Finger    35
Potato           45
dtype: int64

In [108]:
new_data.sort_values()
# Values are sorted in alphabetical order

0     a
4     a
8     a
12    a
1     b
5     b
9     b
13    b
2     c
6     c
10    c
14    c
3     d
7     d
11    d
15    d
dtype: object

In [110]:
veg

Potato           45
Tomato           35
Green Chilli     20
Beetroot         25
Ladies Finger    35
dtype: int64

In [112]:
veg.sort_index()
# The Label Index values are sorted in alphabetical order

Beetroot         25
Green Chilli     20
Ladies Finger    35
Potato           45
Tomato           35
dtype: int64

In [114]:
new_data.sort_index()
# The positional index values are sorted in ascending order

0     a
1     b
2     c
3     d
4     a
5     b
6     c
7     d
8     a
9     b
10    c
11    d
12    a
13    b
14    c
15    d
dtype: object

Dealing with Missing Data

For writing nan, numpy needs to be imported
np.nan is used to represent missing or undefined values.

In [123]:
import numpy as np
miss_data = pd.Series([2,4,np.nan,8,np.nan,np.nan])
miss_data

0    2.0
1    4.0
2    NaN
3    8.0
4    NaN
5    NaN
dtype: float64

In [124]:
miss_data.isnull()
# This returns in True/False format.
# Is Null = True: NaN

0    False
1    False
2     True
3    False
4     True
5     True
dtype: bool

In [127]:
miss_data.isnull().sum()
# This returns, numpy integer object with value 3

np.int64(3)

In [128]:
# Count no of missing records
print(miss_data.isnull().sum())

# This returns direct value - 3

3


In [130]:
type(miss_data.isnull().sum())

# This returns the value type: numpy.int64

numpy.int64

In [132]:
miss_data

0    2.0
1    4.0
2    NaN
3    8.0
4    NaN
5    NaN
dtype: float64

In [134]:
miss_data.notnull()
# Returns True for not-null values and False for null values

0     True
1     True
2    False
3     True
4    False
5    False
dtype: bool

In [136]:
miss_data.isna()
# Is not available -> If null = True, If not Null = False

0    False
1    False
2     True
3    False
4     True
5     True
dtype: bool