### Lesson 7 

--> None   
--> NaN [Not a Number]  
--> Exception to use Aggregate functions on NaN values  
--> NaN and None values in Pandas  
--> Detecting Null Values  
--> Dropping Null Values  
--> Filling Null Values  

### None 

In [4]:
import numpy as np

a = np.array([1,'2',None])  # We are inserting a None value in the array 'a'
a

array([1, '2', None], dtype=object)

In [5]:
a.sum()  # Addition and Arthematic Operations are not possible on None data

TypeError: unsupported operand type(s) for +: 'int' and 'str'

### NaN [Not a Number]

In [7]:
value = np.array([1,2,3,np.nan])   # We are inserting a NaN value which is a float data type
value

array([ 1.,  2.,  3., nan])

In [12]:
value.dtype    # Gives the data type of the array value

dtype('float64')

In [None]:
# Any arthematic operations performed on NaN gives the result as NaN

In [13]:
value[0] * np.nan   

nan

In [15]:
value[1] + value[0] + np.nan

nan

In [None]:
# Any aggregate functions performed on NaN gives the result NaN

In [16]:
value.sum()

nan

In [17]:
value.min()

nan

In [18]:
value.max()

nan

### Exception to use Aggregate functions on NaN values 

In [20]:
np.nansum(value)

6.0

In [21]:
np.nanmin(value)

1.0

In [22]:
np.nanmax(value)

3.0

### NaN and None values in Pandas 

In [24]:
import pandas as pd
import numpy as np

In [25]:
data = pd.Series([1,2,3,np.nan,8,10,None])   # We are inserting None and NaN value into the data 
data                      

# The output data is converted to NaN which is floating point value defined in the IEEE floating-point standard

0     1.0
1     2.0
2     3.0
3     NaN
4     8.0
5    10.0
6     NaN
dtype: float64

In [26]:
x = pd.Series([0,1,2,3])  # We are creating a Series of integers 
x

0    0
1    1
2    2
3    3
dtype: int64

In [27]:
x[1] = None  # We are converting the value present at the 1st index to None 
x   # However python converts it to NaN

0    0.0
1    NaN
2    2.0
3    3.0
dtype: float64

### Detecting Null Values

In [30]:
data = pd.Series([1,4,None,9,0,1,np.nan])
data.isnull() # Here isnull() gives values in boolean stating whether a value is null or not null
              # Here the 2nd and the 6th Index have null values so it was given True 

0    False
1    False
2     True
3    False
4    False
5    False
6     True
dtype: bool

In [31]:
data.notnull()  # The notnull() gives values in boolean stating whether a value is not null or null
                # Here most of the values are not null except for values present in the 2nd and 6th index so they are False

0     True
1     True
2    False
3     True
4     True
5     True
6    False
dtype: bool

### Dropping Null Values

In [90]:
data = pd.DataFrame([ [1,2,3,4],
                     [5,6,np.nan,7],
                     [None,10,21,np.nan]
                     ])   # We are creating a Data Frame 
data

Unnamed: 0,0,1,2,3
0,1.0,2,3.0,4.0
1,5.0,6,,7.0
2,,10,21.0,


In [33]:
data.dropna()  # dropna() --> removes the rows which contain NaN values 

Unnamed: 0,0,1,2,3
0,1.0,2,3.0,4.0


In [34]:
data.dropna(axis ="columns")  # Here dropna() --> removes the columns which contain NaN values 

Unnamed: 0,1
0,2
1,6
2,10


In [35]:
data

Unnamed: 0,0,1,2,3
0,1.0,2,3.0,4.0
1,5.0,6,,7.0
2,,10,21.0,


In [93]:
data[3] = np.nan  # We are changning the values at index 3 of the column    [You can use loc and iloc to change values of rows]
data

Unnamed: 0,0,1,2,3
0,1.0,2,3.0,
1,5.0,6,,
2,,10,21.0,


In [47]:
data.dropna(axis='columns',how='all')  # axis='columns' --> filtering of values is through columns
                                    # the how='all' if all the values are NaN in the column, the entire column [3rd column] is removed

Unnamed: 0,0,1,2
0,1.0,2,3.0
1,5.0,6,
2,,10,21.0


In [48]:
data.dropna(axis='columns',how='any') # axis='colums' -> filtering of values is done on columns
                                    # the how='any' if any value is NaN in the column the entire column is removed

Unnamed: 0,1
0,2
1,6
2,10


In [49]:
data 

Unnamed: 0,0,1,2,3
0,1.0,2,3.0,
1,5.0,6,,
2,,10,21.0,


In [50]:
data.dropna(how='any')  # this checks the row values
                        # how='any' if any value in the row is NaN then the entire row is deleted

Unnamed: 0,0,1,2,3


In [51]:
data.dropna(how='all') # This checks the row values
                        # how='all' if all the values in the row are NaN then the entire row is deleted

Unnamed: 0,0,1,2,3
0,1.0,2,3.0,
1,5.0,6,,
2,,10,21.0,


In [52]:
data

Unnamed: 0,0,1,2,3
0,1.0,2,3.0,
1,5.0,6,,
2,,10,21.0,


In [98]:
data.dropna(axis='rows',thresh=2) # axis='rows': The method will consider rows for dropping.

                                 # thresh=2: Each row must have at least 2 non-missing (non-NaN) values to be retained. If a row has fewer than 2 non-missing values, it will be dropped.

Unnamed: 0,0,1,2,3
0,1.0,2,3.0,
1,5.0,6,,
2,,10,21.0,


### Filling Null Values

In [68]:
data = pd.Series([1,2,3,np.nan,5,None,8],index=list('abcdefg')) 
        # We are creating a Pandas Series Object and assigning a list of index values to the variable data
data

a    1.0
b    2.0
c    3.0
d    NaN
e    5.0
f    NaN
g    8.0
dtype: float64

In [69]:
data.fillna(0)  # fills the data with the value 0 where there is NaN value

a    1.0
b    2.0
c    3.0
d    0.0
e    5.0
f    0.0
g    8.0
dtype: float64

In [70]:
data

a    1.0
b    2.0
c    3.0
d    NaN
e    5.0
f    NaN
g    8.0
dtype: float64

In [71]:
data.fillna(method='ffill') # when I use method='ffill' --> It fills based on the previous number [goes in ascending order [top to bottom]]
                            # For Example
                            # The index value at d. and index value at f. are NaN
                            # So the previous value before index d. is 3.0, so this value is also stored at index d.
                            # Whereas at index f. the value at the previous index is 5.0 so the same value is stored in index f.

a    1.0
b    2.0
c    3.0
d    3.0
e    5.0
f    5.0
g    8.0
dtype: float64

In [72]:
data.fillna(method='bfill') # when I use method='bfill' --> It fills based on the previous number [goes in descending order [bottom to top]]
                            # For Example
                            # The index value at d. and index value at f. are NaN
                            # So the previous value before index d. is 5.0[index e], so this value is also stored at index d.
                            # Whereas at index f. the value at the previous index is 8.0[index f] so the same value is stored in index f.

a    1.0
b    2.0
c    3.0
d    5.0
e    5.0
f    8.0
g    8.0
dtype: float64

In [79]:
data = pd.DataFrame([[1,2,3,np.nan],
                    [4,None,1,13],
                    [41,np.nan,90,100]])
data

Unnamed: 0,0,1,2,3
0,1,2.0,3,
1,4,,1,13.0
2,41,,90,100.0


In [80]:
data.fillna(0)

Unnamed: 0,0,1,2,3
0,1,2.0,3,0.0
1,4,0.0,1,13.0
2,41,0.0,90,100.0


In [87]:
data.fillna(axis='columns',method='ffill')  # axis='colums' --> checks through columns, method='ffill' --> checks through ascending order[top to bottom] 
                                            # The values at index [0,3], [1,1],[2,1] are NaN
                                            # As we have applied a condition of check through columns and foward fill 
                                            # This means it fills the NaN values with the value to their left within the same row
                                            # So value at index [0,3] = 3.0 , [1,1] = 4.0,[2,1] = 41.0

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,4.0,4.0,1.0,13.0
2,41.0,41.0,90.0,100.0


In [82]:
data.fillna(axis='columns',method = 'bfill') # axis='colums' --> checks through columns, method='ffill' --> checks through ascending order[top to bottom] 
                                            # The values at index [0,3], [1,1],[2,1] are NaN
                                            # As we have applied a condition of check through columns and foward fill 
                                            # This means it fills the NaN values with the value to their right within the same row
                                            # So value at index [0,3] = NaN [Because there is no column present at the right of the data ] , [1,1] = 1.0,[2,1] = 90.0

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,4.0,1.0,1.0,13.0
2,41.0,90.0,90.0,100.0
