## Type 1
### Handling Missing Value with a single Line of code

In [166]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [167]:
df = pd.read_csv('data2.csv')
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90,?,23,90,?,23
1,rohit_madke,missing,89,,missing,89,
2,sahil,78,78,?,78,78,?
3,rutuja,,,missing,,,missing
4,kasturi,missing,,21,missing,,21
5,sanjil,65,56,22,65,56,22
6,swapnil,45,67,?,45,67,?
7,rajat,80,45,24,80,45,24
8,nishant,70,67,missing,70,67,missing
9,ramachal,,,22,,,22


In [168]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Name              19 non-null     object
 1   Python            14 non-null     object
 2   Machine Learning  12 non-null     object
 3   Age               18 non-null     object
 4   AI                14 non-null     object
 5   NLP               13 non-null     object
 6   Cv                16 non-null     object
dtypes: object(7)
memory usage: 1.2+ KB


    As you can see the Dataset it has missing values and will try to 
    replace such unwanted values with Nan
       
    But before replacing it with "Nan" you should always know what are 
    the different unwanted values.

In [169]:
# As i already know that what are the different unwanted values,
# we have in the dataset so we can mention it while reading the files.
df=pd.read_csv('data2.csv',na_values=['missing','*','?','/','$'])

In [170]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23.0,90.0,,23.0
1,rohit_madke,,89.0,,,89.0,
2,sahil,78.0,78.0,,78.0,78.0,
3,rutuja,,,,,,
4,kasturi,,,21.0,,,21.0
5,sanjil,65.0,56.0,22.0,65.0,56.0,22.0
6,swapnil,45.0,67.0,,45.0,67.0,
7,rajat,80.0,45.0,24.0,80.0,45.0,24.0
8,nishant,70.0,67.0,,70.0,67.0,
9,ramachal,,,22.0,,,22.0


In [171]:
# if you notice something your unwanted values got replaced by Nan
# just try to get the information about our dataset 
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            6 non-null      float64
 2   Machine Learning  7 non-null      float64
 3   Age               14 non-null     float64
 4   AI                9 non-null      float64
 5   NLP               8 non-null      float64
 6   Cv                11 non-null     float64
dtypes: float64(6), object(1)
memory usage: 1.2+ KB


In [172]:
# See, bydefault it also changed the datatype of those columns,
# which had unwanted values or you can say the missing values.

In [173]:
df.isnull().sum()

Name                 0
Python              13
Machine Learning    12
Age                  5
AI                  10
NLP                 11
Cv                   8
dtype: int64

In [174]:
# df = df.fillna(df.mean(),inplace=True)
# doesnt work

In [175]:
# lets try to fill the 'Nan'  with Imputation rule
df['Age'].fillna(df.Age.mean(),inplace=True)
df['Python'].fillna(round(df.Python.mean(),2),inplace=True)
df['Machine Learning'].fillna(round(df['Machine Learning'].mean(),2),inplace=True)
df['AI'].fillna(round(df.AI.mean(),2),inplace=True)
df['NLP'].fillna(round(df['NLP'].mean(),2),inplace=True)
df['Cv'].fillna(round(df['Cv'].mean(),2),inplace=True)

In [176]:
df.head()

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,68.86,23.0,90.0,70.0,23.0
1,rohit_madke,71.33,89.0,22.285714,73.11,89.0,54.55
2,sahil,78.0,78.0,22.285714,78.0,78.0,54.55
3,rutuja,71.33,68.86,22.285714,73.11,70.0,54.55
4,kasturi,71.33,68.86,21.0,73.11,70.0,21.0


In [177]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            19 non-null     float64
 2   Machine Learning  19 non-null     float64
 3   Age               19 non-null     float64
 4   AI                19 non-null     float64
 5   NLP               19 non-null     float64
 6   Cv                19 non-null     float64
dtypes: float64(6), object(1)
memory usage: 1.2+ KB


In [178]:
# let change the datatype of the Age column from float to int
# because Age in float doesn't seems to be relevant ib the Dataset
# do change the Age datatype from float to int
df['Age'] = df['Age'].astype('int')

In [179]:
df['Age'].dtype

dtype('int32')

In [180]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            19 non-null     float64
 2   Machine Learning  19 non-null     float64
 3   Age               19 non-null     int32  
 4   AI                19 non-null     float64
 5   NLP               19 non-null     float64
 6   Cv                19 non-null     float64
dtypes: float64(5), int32(1), object(1)
memory usage: 1.1+ KB


In [181]:
# if you want to change the datatype of Multiple column 
# lets change the datatype of "nlp" and "cv" from float to int
df = df.astype({
    "NLP":"int",
    "Cv":'int'
})

In [182]:
print(df['NLP'].dtype)
print(df['Cv'].dtype)

int32
int32


In [183]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            19 non-null     float64
 2   Machine Learning  19 non-null     float64
 3   Age               19 non-null     int32  
 4   AI                19 non-null     float64
 5   NLP               19 non-null     int32  
 6   Cv                19 non-null     int32  
dtypes: float64(3), int32(3), object(1)
memory usage: 964.0+ bytes


## Type 2
### Handling Missing Value with Regex

In [184]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [185]:
df = pd.read_csv('data2.csv')
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90,?,23,90,?,23
1,rohit_madke,missing,89,,missing,89,
2,sahil,78,78,?,78,78,?
3,rutuja,,,missing,,,missing
4,kasturi,missing,,21,missing,,21
5,sanjil,65,56,22,65,56,22
6,swapnil,45,67,?,45,67,?
7,rajat,80,45,24,80,45,24
8,nishant,70,67,missing,70,67,missing
9,ramachal,,,22,,,22


In [187]:
# Import Regular expression 
import re

In [188]:
# Now lets replace unwanted value with "Nan" using regex
import re
df.replace({
    'Python':'\D',
    'Machine Learning':'\D',
    'Age':'\D',
    'AI':'\D',
    'NLP':'\D',
    'Cv':'\D'
    
},np.nan,regex=True,inplace=True)

In [189]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23.0,90.0,,23.0
1,rohit_madke,,89.0,,,89.0,
2,sahil,78.0,78.0,,78.0,78.0,
3,rutuja,,,,,,
4,kasturi,,,21.0,,,21.0
5,sanjil,65.0,56.0,22.0,65.0,56.0,22.0
6,swapnil,45.0,67.0,,45.0,67.0,
7,rajat,80.0,45.0,24.0,80.0,45.0,24.0
8,nishant,70.0,67.0,,70.0,67.0,
9,ramachal,,,22.0,,,22.0


In [190]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Name              19 non-null     object
 1   Python            6 non-null      object
 2   Machine Learning  7 non-null      object
 3   Age               14 non-null     object
 4   AI                9 non-null      object
 5   NLP               8 non-null      object
 6   Cv                11 non-null     object
dtypes: object(7)
memory usage: 1.2+ KB


In [191]:
df.columns

Index(['Name', 'Python', 'Machine Learning', 'Age', 'AI', 'NLP', 'Cv'], dtype='object')

In [132]:
list1 = list(df.columns)

In [133]:
list1

['Name', 'Python', 'Machine Learning', 'Age', 'AI', 'NLP', 'Cv']

In [134]:
list1 = list1[1:]

In [135]:
list1

['Python', 'Machine Learning', 'Age', 'AI', 'NLP', 'Cv']

In [136]:
# let change the Datatype of the above mention list

In [192]:
for i in list1:
    df[i] = df[i].astype("float")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            6 non-null      float64
 2   Machine Learning  7 non-null      float64
 3   Age               14 non-null     float64
 4   AI                9 non-null      float64
 5   NLP               8 non-null      float64
 6   Cv                11 non-null     float64
dtypes: float64(6), object(1)
memory usage: 1.2+ KB


In [138]:
# Let replace the Nan with imputation usnig for loop:
for i in list1:
    df[i].fillna(round(df[i].mean(),2),inplace=True)
df

# alternate way

# for i in list1:
#     mean = df[i].mean()
#     df[i].fillna(mean,inplace=True)
# df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,68.86,23.0,90.0,70.0,23.0
1,rohit_madke,71.33,89.0,22.29,73.11,89.0,54.55
2,sahil,78.0,78.0,22.29,78.0,78.0,54.55
3,rutuja,71.33,68.86,22.29,73.11,70.0,54.55
4,kasturi,71.33,68.86,21.0,73.11,70.0,21.0
5,sanjil,65.0,56.0,22.0,65.0,56.0,22.0
6,swapnil,45.0,67.0,22.29,45.0,67.0,54.55
7,rajat,80.0,45.0,24.0,80.0,45.0,24.0
8,nishant,70.0,67.0,22.29,70.0,67.0,54.55
9,ramachal,71.33,68.86,22.0,73.11,70.0,22.0


In [193]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            6 non-null      float64
 2   Machine Learning  7 non-null      float64
 3   Age               14 non-null     float64
 4   AI                9 non-null      float64
 5   NLP               8 non-null      float64
 6   Cv                11 non-null     float64
dtypes: float64(6), object(1)
memory usage: 1.2+ KB


In [140]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,68.86,23.0,90.0,70.0,23.0
1,rohit_madke,71.33,89.0,22.29,73.11,89.0,54.55
2,sahil,78.0,78.0,22.29,78.0,78.0,54.55
3,rutuja,71.33,68.86,22.29,73.11,70.0,54.55
4,kasturi,71.33,68.86,21.0,73.11,70.0,21.0
5,sanjil,65.0,56.0,22.0,65.0,56.0,22.0
6,swapnil,45.0,67.0,22.29,45.0,67.0,54.55
7,rajat,80.0,45.0,24.0,80.0,45.0,24.0
8,nishant,70.0,67.0,22.29,70.0,67.0,54.55
9,ramachal,71.33,68.86,22.0,73.11,70.0,22.0


In [142]:
# checking for the Null Values
df.isnull().sum()

Name                0
Python              0
Machine Learning    0
Age                 0
AI                  0
NLP                 0
Cv                  0
dtype: int64

### Type 3
#### Handling Missing Value with na_value parameter using regex in that

In [143]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import re

In [144]:
df = pd.read_csv('data2.csv')
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90,?,23,90,?,23
1,rohit_madke,missing,89,,missing,89,
2,sahil,78,78,?,78,78,?
3,rutuja,,,missing,,,missing
4,kasturi,missing,,21,missing,,21
5,sanjil,65,56,22,65,56,22
6,swapnil,45,67,?,45,67,?
7,rajat,80,45,24,80,45,24
8,nishant,70,67,missing,70,67,missing
9,ramachal,,,22,,,22


In [148]:
df = pd.read_csv('data2.csv',na_values=({
    'Python':'\D',
    'Machine Learning':'\D',
    'Age':'\D',
    'AI':'\D',
    'NLP':'\D',
    'Cv':'\D'
    
},np.nan,regex=True)

SyntaxError: invalid syntax (2874542194.py, line 9)

In [146]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90,?,23,90,?,23
1,rohit_madke,missing,89,,missing,89,
2,sahil,78,78,?,78,78,?
3,rutuja,,,missing,,,missing
4,kasturi,missing,,21,missing,,21
5,sanjil,65,56,22,65,56,22
6,swapnil,45,67,?,45,67,?
7,rajat,80,45,24,80,45,24
8,nishant,70,67,missing,70,67,missing
9,ramachal,,,22,,,22


In [None]:
# it doesnt seems to be working in that way which i was expecting.

## Type 4
### Handle Missing Value using replace function 

In [149]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [150]:
df = pd.read_csv('data2.csv')
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90,?,23,90,?,23
1,rohit_madke,missing,89,,missing,89,
2,sahil,78,78,?,78,78,?
3,rutuja,,,missing,,,missing
4,kasturi,missing,,21,missing,,21
5,sanjil,65,56,22,65,56,22
6,swapnil,45,67,?,45,67,?
7,rajat,80,45,24,80,45,24
8,nishant,70,67,missing,70,67,missing
9,ramachal,,,22,,,22


In [153]:
df.replace(['missing','*','?','/','$'],np.nan,inplace=True)

In [154]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23.0,90.0,,23.0
1,rohit_madke,,89.0,,,89.0,
2,sahil,78.0,78.0,,78.0,78.0,
3,rutuja,,,,,,
4,kasturi,,,21.0,,,21.0
5,sanjil,65.0,56.0,22.0,65.0,56.0,22.0
6,swapnil,45.0,67.0,,45.0,67.0,
7,rajat,80.0,45.0,24.0,80.0,45.0,24.0
8,nishant,70.0,67.0,,70.0,67.0,
9,ramachal,,,22.0,,,22.0


In [155]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Name              19 non-null     object
 1   Python            6 non-null      object
 2   Machine Learning  7 non-null      object
 3   Age               14 non-null     object
 4   AI                9 non-null      object
 5   NLP               8 non-null      object
 6   Cv                11 non-null     object
dtypes: object(7)
memory usage: 1.2+ KB


In [156]:
df.columns

Index(['Name', 'Python', 'Machine Learning', 'Age', 'AI', 'NLP', 'Cv'], dtype='object')

In [157]:
list1 = list(df.columns)

In [158]:
list1

['Name', 'Python', 'Machine Learning', 'Age', 'AI', 'NLP', 'Cv']

In [159]:
list1 = list1[1:]

In [161]:
list1

['Python', 'Machine Learning', 'Age', 'AI', 'NLP', 'Cv']

In [162]:
# lets change the Datatype with using for loop which we saw earlier
for i in list1:
    df[i] = df[i].astype('float')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            6 non-null      float64
 2   Machine Learning  7 non-null      float64
 3   Age               14 non-null     float64
 4   AI                9 non-null      float64
 5   NLP               8 non-null      float64
 6   Cv                11 non-null     float64
dtypes: float64(6), object(1)
memory usage: 1.2+ KB


In [163]:
df.isnull().sum()

Name                 0
Python              13
Machine Learning    12
Age                  5
AI                  10
NLP                 11
Cv                   8
dtype: int64

In [164]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23.0,90.0,,23.0
1,rohit_madke,,89.0,,,89.0,
2,sahil,78.0,78.0,,78.0,78.0,
3,rutuja,,,,,,
4,kasturi,,,21.0,,,21.0
5,sanjil,65.0,56.0,22.0,65.0,56.0,22.0
6,swapnil,45.0,67.0,,45.0,67.0,
7,rajat,80.0,45.0,24.0,80.0,45.0,24.0
8,nishant,70.0,67.0,,70.0,67.0,
9,ramachal,,,22.0,,,22.0


In [165]:
# Now lets replace "Nan" with the imputation rule 
for i in list1:
    df[i].fillna(round(df[i].mean(),2),inplace=True)
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,68.86,23.0,90.0,70.0,23.0
1,rohit_madke,71.33,89.0,22.29,73.11,89.0,54.55
2,sahil,78.0,78.0,22.29,78.0,78.0,54.55
3,rutuja,71.33,68.86,22.29,73.11,70.0,54.55
4,kasturi,71.33,68.86,21.0,73.11,70.0,21.0
5,sanjil,65.0,56.0,22.0,65.0,56.0,22.0
6,swapnil,45.0,67.0,22.29,45.0,67.0,54.55
7,rajat,80.0,45.0,24.0,80.0,45.0,24.0
8,nishant,70.0,67.0,22.29,70.0,67.0,54.55
9,ramachal,71.33,68.86,22.0,73.11,70.0,22.0


# Type 5
### Handling Missing Value with a If-else Statement

In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [51]:
df = pd.read_csv('data2.csv')
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90,?,23,90,?,23
1,rohit_madke,missing,89,,missing,89,
2,sahil,78,78,?,78,78,?
3,rutuja,,,missing,,,missing
4,kasturi,missing,,21,missing,,21
5,sanjil,65,56,22,65,56,22
6,swapnil,45,67,?,45,67,?
7,rajat,80,45,24,80,45,24
8,nishant,70,67,missing,70,67,missing
9,ramachal,,,22,,,22


In [107]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Name              19 non-null     object
 1   Python            14 non-null     object
 2   Machine Learning  12 non-null     object
 3   Age               18 non-null     object
 4   AI                14 non-null     object
 5   NLP               13 non-null     object
 6   Cv                16 non-null     object
dtypes: object(7)
memory usage: 1.2+ KB


In [108]:
df['Python'].value_counts()

missing    3
*          2
90         1
78         1
65         1
45         1
80         1
70         1
$          1
?          1
/          1
Name: Python, dtype: int64

In [109]:
def pynan(x):
    if x in ['*','$','?','/','missing']:
        x = np.nan
        return x
    else:
        return x

In [110]:
df['Python'] = df['Python'].map(pynan)

In [111]:
df['Python']

0      90
1     NaN
2      78
3     NaN
4     NaN
5      65
6      45
7      80
8      70
9     NaN
10    NaN
11    NaN
12    NaN
13    NaN
14    NaN
15    NaN
16    NaN
17    NaN
18    NaN
Name: Python, dtype: object

In [112]:
df['Machine Learning'].value_counts()

?          2
67         2
/          2
89         1
78         1
56         1
45         1
missing    1
80         1
Name: Machine Learning, dtype: int64

In [113]:
df['Machine Learning'].dtype

dtype('O')

In [114]:
def mlnan(x):
    if x in ['?','/','missing']:
        x = np.nan
        return x
    else:
        return x

In [115]:
df['Machine Learning'] = df['Machine Learning'].map(mlnan)

In [116]:
df['Machine Learning']

0     NaN
1      89
2      78
3     NaN
4     NaN
5      56
6      67
7      45
8      67
9     NaN
10    NaN
11    NaN
12    NaN
13    NaN
14    NaN
15     80
16    NaN
17    NaN
18    NaN
Name: Machine Learning, dtype: object

In [117]:
df['Machine Learning'].isnull().sum()

12

In [118]:
df['Age'].value_counts()

23         6
22         4
?          2
missing    2
21         2
24         1
20         1
Name: Age, dtype: int64

In [119]:
def agenan(x):
    if x in ['?','missing']:
        x = np.nan
        return x
    else:
        return x

In [120]:
df['Age'] = df['Age'].map(agenan)

In [121]:
df['Age']

0      23
1     NaN
2     NaN
3     NaN
4      21
5      22
6     NaN
7      24
8     NaN
9      22
10     22
11     20
12     23
13     23
14     23
15     23
16     23
17     22
18     21
Name: Age, dtype: object

In [122]:
df['AI'].value_counts()

missing    4
80         3
70         2
90         1
78         1
65         1
45         1
?          1
Name: AI, dtype: int64

In [123]:
def ainan(x):
    if x in ['missing','?']:
        x = np.nan
        return x
    else:
        return x

In [124]:
df['AI'] = df['AI'].map(ainan)

In [125]:
df['AI']

0      90
1     NaN
2      78
3     NaN
4     NaN
5      65
6      45
7      80
8      70
9     NaN
10    NaN
11     80
12    NaN
13     80
14    NaN
15     70
16    NaN
17    NaN
18    NaN
Name: AI, dtype: object

In [126]:
df['NLP'].value_counts()

?          3
78         2
67         2
missing    2
89         1
56         1
45         1
80         1
Name: NLP, dtype: int64

In [127]:
def nlpnan(x):
    if x in ['?','missing']:
        x = np.nan
        return x
    else:
        return x

In [128]:
df['NLP'] = df['NLP'].map(nlpnan)

In [129]:
df['NLP']

0     NaN
1      89
2      78
3     NaN
4     NaN
5      56
6      67
7      45
8      67
9     NaN
10    NaN
11    NaN
12    NaN
13     78
14    NaN
15    NaN
16    NaN
17     80
18    NaN
Name: NLP, dtype: object

In [130]:
df['Cv'].value_counts()

missing    3
?          2
22         2
80         2
90         2
23         1
21         1
24         1
78         1
70         1
Name: Cv, dtype: int64

In [131]:
def cvnan(x):
    if x in ['missing','?']:
        x = np.nan
        return x
    else:
        return x
    

In [132]:
df['Cv'] = df['Cv'].map(cvnan)

In [133]:
df['Cv']

0      23
1     NaN
2     NaN
3     NaN
4      21
5      22
6     NaN
7      24
8     NaN
9      22
10     80
11     90
12     80
13    NaN
14    NaN
15    NaN
16     78
17     90
18     70
Name: Cv, dtype: object

In [134]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Name              19 non-null     object
 1   Python            6 non-null      object
 2   Machine Learning  7 non-null      object
 3   Age               14 non-null     object
 4   AI                9 non-null      object
 5   NLP               8 non-null      object
 6   Cv                11 non-null     object
dtypes: object(7)
memory usage: 1.2+ KB


In [135]:
# Changing the Datatype

In [136]:
df['Python'] = df['Python'].astype('float')

In [137]:
df.Python.dtype

dtype('float64')

In [138]:
df['Machine Learning'] = df['Machine Learning'].astype('float')

In [139]:
df['Machine Learning'].dtype

dtype('float64')

In [140]:
df['Age'] = df['Age'].astype('float')

In [141]:
df['Age'].dtype

dtype('float64')

In [142]:
df['AI']=df['AI'].astype("float")

In [143]:
df['AI'].dtype

dtype('float64')

In [144]:
df['NLP'] = df['NLP'].astype('float')

In [145]:
df['NLP'].dtype

dtype('float64')

In [146]:
df['Cv'] = df['Cv'].astype('float')

In [147]:
df['Cv'].dtype

dtype('float64')

In [148]:
# Replacing the Nan values with iomputation rule

In [149]:
df['Python'].fillna(round(df.Python.mean(),2),inplace=True)

In [150]:
df['Python']

0     90.00
1     71.33
2     78.00
3     71.33
4     71.33
5     65.00
6     45.00
7     80.00
8     70.00
9     71.33
10    71.33
11    71.33
12    71.33
13    71.33
14    71.33
15    71.33
16    71.33
17    71.33
18    71.33
Name: Python, dtype: float64

In [151]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23.0,90.0,,23.0
1,rohit_madke,71.33,89.0,,,89.0,
2,sahil,78.0,78.0,,78.0,78.0,
3,rutuja,71.33,,,,,
4,kasturi,71.33,,21.0,,,21.0
5,sanjil,65.0,56.0,22.0,65.0,56.0,22.0
6,swapnil,45.0,67.0,,45.0,67.0,
7,rajat,80.0,45.0,24.0,80.0,45.0,24.0
8,nishant,70.0,67.0,,70.0,67.0,
9,ramachal,71.33,,22.0,,,22.0


In [152]:
df['Machine Learning'].fillna(round(df['Machine Learning'].mean(),2),inplace=True)

In [153]:
df['Machine Learning']

0     68.86
1     89.00
2     78.00
3     68.86
4     68.86
5     56.00
6     67.00
7     45.00
8     67.00
9     68.86
10    68.86
11    68.86
12    68.86
13    68.86
14    68.86
15    80.00
16    68.86
17    68.86
18    68.86
Name: Machine Learning, dtype: float64

In [155]:
df['Age'].fillna(round(df['Age'].mean(),0),inplace=True)

In [157]:
df['Age'] = df['Age'].astype('int')

In [158]:
df['Age']

0     23
1     22
2     22
3     22
4     21
5     22
6     22
7     24
8     22
9     22
10    22
11    20
12    23
13    23
14    23
15    23
16    23
17    22
18    21
Name: Age, dtype: int32

In [159]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            19 non-null     float64
 2   Machine Learning  19 non-null     float64
 3   Age               19 non-null     int32  
 4   AI                9 non-null      float64
 5   NLP               8 non-null      float64
 6   Cv                11 non-null     float64
dtypes: float64(5), int32(1), object(1)
memory usage: 1.1+ KB


In [162]:
df['AI'].fillna(df['AI'].mean(),inplace=True)

In [164]:
df['AI'] = round(df['AI'],2)

In [165]:
df['AI']

0     90.00
1     73.11
2     78.00
3     73.11
4     73.11
5     65.00
6     45.00
7     80.00
8     70.00
9     73.11
10    73.11
11    80.00
12    73.11
13    80.00
14    73.11
15    70.00
16    73.11
17    73.11
18    73.11
Name: AI, dtype: float64

In [160]:
df['NLP'].fillna(round(df['NLP'].mean(),2),inplace=True)

In [161]:
df['NLP']

0     70.0
1     89.0
2     78.0
3     70.0
4     70.0
5     56.0
6     67.0
7     45.0
8     67.0
9     70.0
10    70.0
11    70.0
12    70.0
13    78.0
14    70.0
15    70.0
16    70.0
17    80.0
18    70.0
Name: NLP, dtype: float64

In [167]:
df['Cv'].fillna(round(df['Cv'].mean(),2),inplace=True)

In [168]:
df['Cv']

0     23.00
1     54.55
2     54.55
3     54.55
4     21.00
5     22.00
6     54.55
7     24.00
8     54.55
9     22.00
10    80.00
11    90.00
12    80.00
13    54.55
14    54.55
15    54.55
16    78.00
17    90.00
18    70.00
Name: Cv, dtype: float64

In [169]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            19 non-null     float64
 2   Machine Learning  19 non-null     float64
 3   Age               19 non-null     int32  
 4   AI                19 non-null     float64
 5   NLP               19 non-null     float64
 6   Cv                19 non-null     float64
dtypes: float64(5), int32(1), object(1)
memory usage: 1.1+ KB


### Type
## pd.to_numeric

In [194]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [195]:
df = pd.read_csv('data2.csv')
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90,?,23,90,?,23
1,rohit_madke,missing,89,,missing,89,
2,sahil,78,78,?,78,78,?
3,rutuja,,,missing,,,missing
4,kasturi,missing,,21,missing,,21
5,sanjil,65,56,22,65,56,22
6,swapnil,45,67,?,45,67,?
7,rajat,80,45,24,80,45,24
8,nishant,70,67,missing,70,67,missing
9,ramachal,,,22,,,22


In [196]:
df['Python']=pd.to_numeric(df.Python,errors='coerce')

In [197]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,?,23,90,?,23
1,rohit_madke,,89,,missing,89,
2,sahil,78.0,78,?,78,78,?
3,rutuja,,,missing,,,missing
4,kasturi,,,21,missing,,21
5,sanjil,65.0,56,22,65,56,22
6,swapnil,45.0,67,?,45,67,?
7,rajat,80.0,45,24,80,45,24
8,nishant,70.0,67,missing,70,67,missing
9,ramachal,,,22,,,22


In [200]:
df['Python'].dtype

dtype('float64')

In [198]:
df['Machine Learning']=pd.to_numeric(df['Machine Learning'],errors='coerce')

In [199]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23,90,?,23
1,rohit_madke,,89.0,,missing,89,
2,sahil,78.0,78.0,?,78,78,?
3,rutuja,,,missing,,,missing
4,kasturi,,,21,missing,,21
5,sanjil,65.0,56.0,22,65,56,22
6,swapnil,45.0,67.0,?,45,67,?
7,rajat,80.0,45.0,24,80,45,24
8,nishant,70.0,67.0,missing,70,67,missing
9,ramachal,,,22,,,22


In [201]:
df['Machine Learning'].dtype

dtype('float64')

In [202]:
df['Age']=pd.to_numeric(df.Age,errors='coerce')

In [203]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23.0,90,?,23
1,rohit_madke,,89.0,,missing,89,
2,sahil,78.0,78.0,,78,78,?
3,rutuja,,,,,,missing
4,kasturi,,,21.0,missing,,21
5,sanjil,65.0,56.0,22.0,65,56,22
6,swapnil,45.0,67.0,,45,67,?
7,rajat,80.0,45.0,24.0,80,45,24
8,nishant,70.0,67.0,,70,67,missing
9,ramachal,,,22.0,,,22


In [204]:
df.Age.dtype

dtype('float64')

In [205]:
df['AI']=pd.to_numeric(df.AI,errors='coerce')

In [206]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23.0,90.0,?,23
1,rohit_madke,,89.0,,,89,
2,sahil,78.0,78.0,,78.0,78,?
3,rutuja,,,,,,missing
4,kasturi,,,21.0,,,21
5,sanjil,65.0,56.0,22.0,65.0,56,22
6,swapnil,45.0,67.0,,45.0,67,?
7,rajat,80.0,45.0,24.0,80.0,45,24
8,nishant,70.0,67.0,,70.0,67,missing
9,ramachal,,,22.0,,,22


In [207]:
df.AI.dtype

dtype('float64')

In [208]:
df['NLP']=pd.to_numeric(df.NLP,errors='coerce')

In [209]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23.0,90.0,,23
1,rohit_madke,,89.0,,,89.0,
2,sahil,78.0,78.0,,78.0,78.0,?
3,rutuja,,,,,,missing
4,kasturi,,,21.0,,,21
5,sanjil,65.0,56.0,22.0,65.0,56.0,22
6,swapnil,45.0,67.0,,45.0,67.0,?
7,rajat,80.0,45.0,24.0,80.0,45.0,24
8,nishant,70.0,67.0,,70.0,67.0,missing
9,ramachal,,,22.0,,,22


In [210]:
df.NLP.dtype

dtype('float64')

In [211]:
df['Cv']=pd.to_numeric(df.Cv,errors='coerce')

In [212]:
df

Unnamed: 0,Name,Python,Machine Learning,Age,AI,NLP,Cv
0,rishi,90.0,,23.0,90.0,,23.0
1,rohit_madke,,89.0,,,89.0,
2,sahil,78.0,78.0,,78.0,78.0,
3,rutuja,,,,,,
4,kasturi,,,21.0,,,21.0
5,sanjil,65.0,56.0,22.0,65.0,56.0,22.0
6,swapnil,45.0,67.0,,45.0,67.0,
7,rajat,80.0,45.0,24.0,80.0,45.0,24.0
8,nishant,70.0,67.0,,70.0,67.0,
9,ramachal,,,22.0,,,22.0


In [213]:
df.Cv.dtype

dtype('float64')

In [214]:
df.columns

Index(['Name', 'Python', 'Machine Learning', 'Age', 'AI', 'NLP', 'Cv'], dtype='object')

In [215]:
list1 = list(df.columns)

In [216]:
list1

['Name', 'Python', 'Machine Learning', 'Age', 'AI', 'NLP', 'Cv']

In [217]:
list1 = list1[1:]

In [218]:
list1

['Python', 'Machine Learning', 'Age', 'AI', 'NLP', 'Cv']

In [219]:
for i in list1:
    df[i].fillna(round(df[i].mean(),2),inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              19 non-null     object 
 1   Python            19 non-null     float64
 2   Machine Learning  19 non-null     float64
 3   Age               19 non-null     float64
 4   AI                19 non-null     float64
 5   NLP               19 non-null     float64
 6   Cv                19 non-null     float64
dtypes: float64(6), object(1)
memory usage: 1.2+ KB
