## NaN value drop 
    In Pandas, NaN (Not a Number) represents missing or undefined data.
    The function dropna() is used to remove rows or columns containing NaN values from a DataFrame or Series.
       Key Parameters
       - axis=0	 = Drop rows (axis=1 drops columns)
       - how='any' = Drop if any NaN is present
       - how='all' = Drop only if all values are NaN
       - subset = Specify columns to check for NaN
       - inplace=True = Modify the original DataFrame

In [100]:
import pandas as pd

In [101]:
df = pd.read_csv("C:\\Users\\Dell\\Desktop\\_PANDAS_\\Data\\Fortune_10.csv")
df.head(2)

Unnamed: 0,ID,Name,Industry,Inception,Revenue,Expenses,Profit,Growth
0,1,Lamtone,IT Services,2009,"$11,757,018","6,482,465 Dollars",5274553.0,30%
1,2,Stripfind,Financial Services,2010,"$12,329,371","916,455 Dollars",11412916.0,20%


### Getting null value in true/false format

In [103]:
df.isnull().any()  

ID           False
Name         False
Industry      True
Inception    False
Revenue      False
Expenses      True
Profit        True
Growth        True
dtype: bool

### Getting Exact number of NULL values in Each Column 

In [105]:
df.isnull().sum()

ID           0
Name         0
Industry     3
Inception    0
Revenue      0
Expenses     1
Profit       2
Growth       1
dtype: int64

### For total number of null value 

In [107]:
print("Total Null values In Dataset")
df.isnull().sum().sum()     

Total Null values In Dataset


7

### Give the value that are not null

In [109]:
df.notnull().sum()   

ID           10
Name         10
Industry      7
Inception    10
Revenue      10
Expenses      9
Profit        8
Growth        9
dtype: int64

### Total number of not null value

In [111]:
df.notnull().sum().sum()   

73

### For Series 

In [113]:
import numpy as np

In [114]:
arr  = pd.Series([1,2,3,4,5,np.nan,4,np.nan])
arr

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    NaN
6    4.0
7    NaN
dtype: float64

In [115]:
arr.isnull().sum()

2

In [116]:
df1 = pd.read_csv("C:\\Users\\Dell\\Desktop\\_PANDAS_\\Data\\Fortune_10.csv")
df1.head(3)

Unnamed: 0,ID,Name,Industry,Inception,Revenue,Expenses,Profit,Growth
0,1,Lamtone,IT Services,2009,"$11,757,018","6,482,465 Dollars",5274553.0,30%
1,2,Stripfind,Financial Services,2010,"$12,329,371","916,455 Dollars",11412916.0,20%
2,3,Canecorporation,Health,2012,"$10,597,009","7,591,189 Dollars",3005820.0,7%


### For droping the nan values 

In [118]:
df1.dropna()

Unnamed: 0,ID,Name,Industry,Inception,Revenue,Expenses,Profit,Growth
0,1,Lamtone,IT Services,2009,"$11,757,018","6,482,465 Dollars",5274553.0,30%
1,2,Stripfind,Financial Services,2010,"$12,329,371","916,455 Dollars",11412916.0,20%
2,3,Canecorporation,Health,2012,"$10,597,009","7,591,189 Dollars",3005820.0,7%
3,4,Mattouch,IT Services,2013,"$14,026,934","7,429,377 Dollars",6597557.0,26%
4,5,Techdrill,Health,2009,"$10,573,990","7,435,363 Dollars",3138627.0,8%


### Dropping NAN Values by Column 

In [120]:
df1.dropna(axis= 1)  

Unnamed: 0,ID,Name,Inception,Revenue
0,1,Lamtone,2009,"$11,757,018"
1,2,Stripfind,2010,"$12,329,371"
2,3,Canecorporation,2012,"$10,597,009"
3,4,Mattouch,2013,"$14,026,934"
4,5,Techdrill,2009,"$10,573,990"
5,6,Techline,2006,"$13,898,119"
6,7,Cityace,2010,"$9,254,614"
7,8,Kayelectronics,2009,"$9,451,943"
8,9,Ganzlax,2011,"$14,001,180"
9,10,Trantraxlax,2011,"$11,088,336"


# If a row has complete nan all feature

In [146]:
df1.dropna(how = 'all')   

Unnamed: 0,ID,Name,Industry,Inception,Revenue,Expenses,Profit,Growth
0,1,Lamtone,IT Services,2009,"$11,757,018","6,482,465 Dollars",5274553.0,30%
1,2,Stripfind,Financial Services,2010,"$12,329,371","916,455 Dollars",11412916.0,20%
2,3,Canecorporation,Health,2012,"$10,597,009","7,591,189 Dollars",3005820.0,7%
3,4,Mattouch,IT Services,2013,"$14,026,934","7,429,377 Dollars",6597557.0,26%
4,5,Techdrill,Health,2009,"$10,573,990","7,435,363 Dollars",3138627.0,8%
8,9,Ganzlax,IT Services,2011,"$14,001,180",,11901180.0,18%


### If a row has atleast one nan in  feature

In [151]:
df1.dropna(how = 'any')   

Unnamed: 0,ID,Name,Industry,Inception,Revenue,Expenses,Profit,Growth
0,1,Lamtone,IT Services,2009,"$11,757,018","6,482,465 Dollars",5274553.0,30%
1,2,Stripfind,Financial Services,2010,"$12,329,371","916,455 Dollars",11412916.0,20%
2,3,Canecorporation,Health,2012,"$10,597,009","7,591,189 Dollars",3005820.0,7%
3,4,Mattouch,IT Services,2013,"$14,026,934","7,429,377 Dollars",6597557.0,26%
4,5,Techdrill,Health,2009,"$10,573,990","7,435,363 Dollars",3138627.0,8%


In [123]:
df1.dropna(thresh = 7)

Unnamed: 0,ID,Name,Industry,Inception,Revenue,Expenses,Profit,Growth
0,1,Lamtone,IT Services,2009,"$11,757,018","6,482,465 Dollars",5274553.0,30%
1,2,Stripfind,Financial Services,2010,"$12,329,371","916,455 Dollars",11412916.0,20%
2,3,Canecorporation,Health,2012,"$10,597,009","7,591,189 Dollars",3005820.0,7%
3,4,Mattouch,IT Services,2013,"$14,026,934","7,429,377 Dollars",6597557.0,26%
4,5,Techdrill,Health,2009,"$10,573,990","7,435,363 Dollars",3138627.0,8%
7,8,Kayelectronics,,2009,"$9,451,943","3,878,113 Dollars",5573830.0,4%
8,9,Ganzlax,IT Services,2011,"$14,001,180",,11901180.0,18%
9,10,Trantraxlax,Government Services,2011,"$11,088,336","5,635,276 Dollars",5453060.0,


### Drop the nan value from the selected column 

In [154]:
df1.dropna(subset = ['Industry','Growth'],inplace=True)  
df1.isnull().sum()                                                     

ID           0
Name         0
Industry     0
Inception    0
Revenue      0
Expenses     1
Profit       0
Growth       0
dtype: int64

In [125]:
df1.fillna({'Expenses' :'123'})

Unnamed: 0,ID,Name,Industry,Inception,Revenue,Expenses,Profit,Growth
0,1,Lamtone,IT Services,2009,"$11,757,018","6,482,465 Dollars",5274553.0,30%
1,2,Stripfind,Financial Services,2010,"$12,329,371","916,455 Dollars",11412916.0,20%
2,3,Canecorporation,Health,2012,"$10,597,009","7,591,189 Dollars",3005820.0,7%
3,4,Mattouch,IT Services,2013,"$14,026,934","7,429,377 Dollars",6597557.0,26%
4,5,Techdrill,Health,2009,"$10,573,990","7,435,363 Dollars",3138627.0,8%
8,9,Ganzlax,IT Services,2011,"$14,001,180",123,11901180.0,18%
