# Part 2 -  DataFrame and Series Basics - Selecting Rows and Columns

In [1]:
Person = {
    "first" : 'Medhat',
    "last" : "Sobhy",
    "email" : "medhatsobhy231@gmail.com"
}

In [2]:
Person = {
    "first" : ['Medhat'],
    "last" : ["Sobhy"],
    "email" : ["medhatsobhy231@gmail.com"]
}

In [3]:
People = {
    "first" : ['Medhat', 'Leo' , "Ronald"],
    "last" : ["Sobhy", "Messi" , "Araujo"],
    "email" : ["medhatsobhy231@gmail.com", "Leomessi10@gmail.com", "Ronaldaraujo4@gmail.com"]
}

In [4]:
import pandas as pd

In [5]:
df = pd.DataFrame(People) #used to import data into a dataframe

In [6]:
df

Unnamed: 0,first,last,email
0,Medhat,Sobhy,medhatsobhy231@gmail.com
1,Leo,Messi,Leomessi10@gmail.com
2,Ronald,Araujo,Ronaldaraujo4@gmail.com


In [7]:
df["email"] #accessing the column email --> pandas series (A column of rows)

0    medhatsobhy231@gmail.com
1        Leomessi10@gmail.com
2     Ronaldaraujo4@gmail.com
Name: email, dtype: object

In [8]:
df.email #same as the above (not preferred due to a probable conflict with builtin methods)

0    medhatsobhy231@gmail.com
1        Leomessi10@gmail.com
2     Ronaldaraujo4@gmail.com
Name: email, dtype: object

In [9]:
df[["first", "email"]] #accessing multiple columns in a dataframe

Unnamed: 0,first,email
0,Medhat,medhatsobhy231@gmail.com
1,Leo,Leomessi10@gmail.com
2,Ronald,Ronaldaraujo4@gmail.com


In [10]:
df.columns #display the columns of a dataframe

Index(['first', 'last', 'email'], dtype='object')

In [11]:
df.iloc[[0,1], 0] #It is used to access rows and columns of a DataFrame by their integer-based index positions. [row_index , col_index ]

0    Medhat
1       Leo
Name: first, dtype: object

In [12]:
df.iloc[: , 1] #all rows of the second column
#df.iloc[1 , :] #second row of all columns

0     Sobhy
1     Messi
2    Araujo
Name: last, dtype: object

In [13]:
df.loc[[0,1], ['email' , 'first']] #it is label-based. This means you use row and column labels (instead of integer positions) to select data.

Unnamed: 0,email,first
0,medhatsobhy231@gmail.com,Medhat
1,Leomessi10@gmail.com,Leo


# Part 3 -   Indexes - How to Set, Reset, and Use Indexes

In [14]:
df.set_index('email') #set a column as the index of the dataframe (preferably a unique one)

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
medhatsobhy231@gmail.com,Medhat,Sobhy
Leomessi10@gmail.com,Leo,Messi
Ronaldaraujo4@gmail.com,Ronald,Araujo


In [15]:
df #indexing by email wasn't performed on the original object (use inplace = True) to modify the original dataframe

Unnamed: 0,first,last,email
0,Medhat,Sobhy,medhatsobhy231@gmail.com
1,Leo,Messi,Leomessi10@gmail.com
2,Ronald,Araujo,Ronaldaraujo4@gmail.com


In [16]:
df.set_index('email', inplace = True) #modify the indexing of the original dataframe

In [17]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
medhatsobhy231@gmail.com,Medhat,Sobhy
Leomessi10@gmail.com,Leo,Messi
Ronaldaraujo4@gmail.com,Ronald,Araujo


In [18]:
df.index #display the indices of the dataframe

Index(['medhatsobhy231@gmail.com', 'Leomessi10@gmail.com',
       'Ronaldaraujo4@gmail.com'],
      dtype='object', name='email')

In [19]:
df.loc['medhatsobhy231@gmail.com' , 'first'] # locate by label indexes --> medhatsobhy231@gmail.com is the first index --> return the first row
# of the column 'first'

'Medhat'

In [20]:
df.iloc [0 , 0] #locate by integers 

'Medhat'

In [21]:
df.reset_index() #dataframe indexing is back to its default state 
# use inplace = True to modify the original object

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,Ronaldaraujo4@gmail.com,Ronald,Araujo


In [22]:
df.reset_index(inplace= True)

In [23]:
df

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,Ronaldaraujo4@gmail.com,Ronald,Araujo


# Part 4 - Filtering - Using Conditionals to Filter Rows and Columns

In [24]:
# filtering in two commands
#filt = df['last'] == 'Sobhy'
#df[filt] 

#filtering in a single command 
#df = df[df['last'] == 'Sobhy']
#df.head()

#filtering using loc
filt = ( df['last'] == 'Sobhy' )
df.loc[filt]

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy


In [25]:
filtered_df = df[~ (df['last'] == 'Sobhy')] #negates the results .. notice the condition being inside parentheses after the ~ 
filtered_df.head()

Unnamed: 0,email,first,last
1,Leomessi10@gmail.com,Leo,Messi
2,Ronaldaraujo4@gmail.com,Ronald,Araujo


In [26]:
(df['last'] == 'Sobhy') # Returns a series of booleans for records that matched the condition "True" and that didn't match "False"

0     True
1    False
2    False
Name: last, dtype: bool

# PART4 -  Updating Rows and Columns - Modifying Data Within DataFrames

In [27]:
df.columns


Index(['email', 'first', 'last'], dtype='object')

In [28]:
df.columns = ['email', 'first_name', 'last_name'] #changing the columns names
df.columns



Index(['email', 'first_name', 'last_name'], dtype='object')

In [29]:
df

Unnamed: 0,email,first_name,last_name
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,Ronaldaraujo4@gmail.com,Ronald,Araujo


In [30]:
df.columns = [x.upper() for x in df.columns] #making all column headers upper case
df

Unnamed: 0,EMAIL,FIRST_NAME,LAST_NAME
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,Ronaldaraujo4@gmail.com,Ronald,Araujo


In [31]:
df.columns = [x.lower() for x in df.columns] #making all column headers lower case
df

Unnamed: 0,email,first_name,last_name
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,Ronaldaraujo4@gmail.com,Ronald,Araujo


In [32]:
df.rename(columns = {'first_name' : 'first', 'last_name' : 'last'}, inplace= True) #changing column names using df.rename()
df

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,Ronaldaraujo4@gmail.com,Ronald,Araujo


## Updating Rows

In [33]:
#using loc --> access a specific row --> change its record
df.loc[2] = ['davidhume@gmail.com','David', 'Hume', ] #updating the third row
df


Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,davidhume@gmail.com,David,Hume


In [34]:
#updating specific columns of a specific row using loc
df.loc[2, ['last', 'email']] = ['Fincher', 'davidfincher@gmail.com']


In [35]:
df.loc[2,'last'] = 'Lynch'
df


Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,davidfincher@gmail.com,David,Lynch


In [36]:
df.loc[2,'last'] = 'fincher'
df

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,davidfincher@gmail.com,David,fincher


In [37]:
#update using filtering 
filt = (df['email'] == 'davidfincher@gmail.com') #returns last row
df.loc[filt, 'last'] = 'Smith' #update on 'last' of the last row
df

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,Leomessi10@gmail.com,Leo,Messi
2,davidfincher@gmail.com,David,Smith


In [38]:
df['email'] = df['email'].str.upper() #change the element of email columns to uppercase
df


Unnamed: 0,email,first,last
0,MEDHATSOBHY231@GMAIL.COM,Medhat,Sobhy
1,LEOMESSI10@GMAIL.COM,Leo,Messi
2,DAVIDFINCHER@GMAIL.COM,David,Smith


In [39]:
df['email'] = df['email'].str.lower() #change the element of email columns back to lowercase
df

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,leomessi10@gmail.com,Leo,Messi
2,davidfincher@gmail.com,David,Smith


> `apply` is used to apply a function along an axis (rows or columns) of a DataFrame or on a Series. It is versatile and can execute custom functions on DataFrame rows, columns, or Series.



In [40]:
# Apply on series 
df['email'].apply(len) # return a series of the length of each element in that column

0    24
1    20
2    22
Name: email, dtype: int64

In [41]:
def update_email(email) : 
    return email.upper()

df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,email,first,last
0,MEDHATSOBHY231@GMAIL.COM,Medhat,Sobhy
1,LEOMESSI10@GMAIL.COM,Leo,Messi
2,DAVIDFINCHER@GMAIL.COM,David,Smith


In [42]:
df.loc[2,'last'] = 'fincher'
df['email'] = df['email'].str.lower()
df

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,leomessi10@gmail.com,Leo,Messi
2,davidfincher@gmail.com,David,fincher


In [43]:
#apply with lambda expression 
df['email'] = df['email'].apply(lambda x: x.upper()) #lambda expression to change emails to uppercase
df

Unnamed: 0,email,first,last
0,MEDHATSOBHY231@GMAIL.COM,Medhat,Sobhy
1,LEOMESSI10@GMAIL.COM,Leo,Messi
2,DAVIDFINCHER@GMAIL.COM,David,fincher


In [44]:
df['email'] = df['email'].apply(lambda x: x.lower()) 
df

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,Medhat,Sobhy
1,leomessi10@gmail.com,Leo,Messi
2,davidfincher@gmail.com,David,fincher


In [45]:
# apply on dataframe
df.apply(len, axis = 'columns') #len across each column


0    3
1    3
2    3
dtype: int64

In [46]:
# apply on dataframe
df.apply(len, axis = 'rows') #len across each row

email    3
first    3
last     3
dtype: int64

In [47]:
len(df['email']) #series length

3

In [48]:
df.apply(pd.Series.min) #Applies the min function to each column in the DataFrame.

email    davidfincher@gmail.com
first                     David
last                      Messi
dtype: object

In [49]:
df.apply(lambda x: x.min()) #for each series (column) apply min

email    davidfincher@gmail.com
first                     David
last                      Messi
dtype: object

1. `apply` on a Series
- Operates on each element in the Series.
- Applies a function element-wise.
2. `apply` on a DataFrame
- Operates on each row or column of the DataFrame.
- Requires specifying axis:
    - axis=0 (default): Applies the function to each column.
    - axis=1: Applies the function to each row.

> `applymap`/`map` is used to apply a function element-wise to all entries in a DataFrame. It works on each individual cell, unlike apply, which operates on rows or columns.

In [50]:
df.applymap(len)

  df.applymap(len)


Unnamed: 0,email,first,last
0,24,6,5
1,20,3,5
2,22,5,7


In [51]:
df.applymap(str.upper)

  df.applymap(str.upper)


Unnamed: 0,email,first,last
0,MEDHATSOBHY231@GMAIL.COM,MEDHAT,SOBHY
1,LEOMESSI10@GMAIL.COM,LEO,MESSI
2,DAVIDFINCHER@GMAIL.COM,DAVID,FINCHER


In [52]:
df = df.map(str.lower)

In [53]:
df

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,medhat,sobhy
1,leomessi10@gmail.com,leo,messi
2,davidfincher@gmail.com,david,fincher


In [54]:
df['first'] = df['first'].replace({'medhat' : 'med' , 'leo' : 'lionel'})
df

Unnamed: 0,email,first,last
0,medhatsobhy231@gmail.com,med,sobhy
1,leomessi10@gmail.com,lionel,messi
2,davidfincher@gmail.com,david,fincher


# PART6 -  Add/Remove Rows and Columns From DataFrames

In [55]:
# Adding Column
df['Full_Name'] = df['first']+' '+ df['last']
df

Unnamed: 0,email,first,last,Full_Name
0,medhatsobhy231@gmail.com,med,sobhy,med sobhy
1,leomessi10@gmail.com,lionel,messi,lionel messi
2,davidfincher@gmail.com,david,fincher,david fincher


In [56]:
df.drop(columns = ['first', 'last'], inplace = True) #removing columns
df

Unnamed: 0,email,Full_Name
0,medhatsobhy231@gmail.com,med sobhy
1,leomessi10@gmail.com,lionel messi
2,davidfincher@gmail.com,david fincher


In [57]:
df['Full_Name'].str.split(' ', expand= True) # True --> returns a data frame , False --> returns a series



Unnamed: 0,0,1
0,med,sobhy
1,lionel,messi
2,david,fincher


In [58]:
df

Unnamed: 0,email,Full_Name
0,medhatsobhy231@gmail.com,med sobhy
1,leomessi10@gmail.com,lionel messi
2,davidfincher@gmail.com,david fincher


In [59]:
df[['first','last']] = df['Full_Name'].str.split(' ', expand= True)

In [60]:
df

Unnamed: 0,email,Full_Name,first,last
0,medhatsobhy231@gmail.com,med sobhy,med,sobhy
1,leomessi10@gmail.com,lionel messi,lionel,messi
2,davidfincher@gmail.com,david fincher,david,fincher


In [63]:
#adding rows 
df = pd.concat([df , pd.DataFrame({'first' : ['Kendrick']})], ignore_index = True) #pd.concat([df1, df2], ignore_index= True)
df

Unnamed: 0,email,Full_Name,first,last
0,medhatsobhy231@gmail.com,med sobhy,med,sobhy
1,leomessi10@gmail.com,lionel messi,lionel,messi
2,davidfincher@gmail.com,david fincher,david,fincher
3,,,Kendrick,
4,,,Kendrick,


In [64]:
df = df.drop(index=4) #drop the row at index 4


In [136]:
df

Unnamed: 0,email,Full_Name,first,last
0,medhatsobhy231@gmail.com,med sobhy,med,sobhy
1,leomessi10@gmail.com,lionel messi,lionel,messi
2,davidfincher@gmail.com,david fincher,david,fincher
3,,,Kendrick,


In [66]:
people = {
    
    'first' : ['Hank' , 'Walter'],
    'last' : ['Anderson', 'White'],
    'email': ['hankanderson@gmail.com', 'walterwhite@gmail.com']
}

In [67]:
df2 = pd.DataFrame(people)

In [68]:
df = pd.concat([df, df2], ignore_index=True)

In [69]:
df

Unnamed: 0,email,Full_Name,first,last
0,medhatsobhy231@gmail.com,med sobhy,med,sobhy
1,leomessi10@gmail.com,lionel messi,lionel,messi
2,davidfincher@gmail.com,david fincher,david,fincher
3,,,Kendrick,
4,hankanderson@gmail.com,,Hank,Anderson
5,walterwhite@gmail.com,,Walter,White


In [70]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [71]:
df.values

array([['medhatsobhy231@gmail.com', 'med sobhy', 'med', 'sobhy'],
       ['leomessi10@gmail.com', 'lionel messi', 'lionel', 'messi'],
       ['davidfincher@gmail.com', 'david fincher', 'david', 'fincher'],
       [nan, nan, 'Kendrick', nan],
       ['hankanderson@gmail.com', nan, 'Hank', 'Anderson'],
       ['walterwhite@gmail.com', nan, 'Walter', 'White']], dtype=object)

In [72]:
filt = (df['first'] == 'Kendrick')
df[filt].index

Index([3], dtype='int64')

In [73]:
filt = (df['first'] == 'Kendrick')
df.drop(index = df[filt].index) #drop index 3

Unnamed: 0,email,Full_Name,first,last
0,medhatsobhy231@gmail.com,med sobhy,med,sobhy
1,leomessi10@gmail.com,lionel messi,lionel,messi
2,davidfincher@gmail.com,david fincher,david,fincher
4,hankanderson@gmail.com,,Hank,Anderson
5,walterwhite@gmail.com,,Walter,White


# PART 7 - Sorting Data

In [79]:
people = {
    "first" : ['Medhat', 'Leo' , "Ronald", "John"],
    "last" : ["Sobhy", "Messi" , "Araujo", "Watson"],
    "email" : ["medhatsobhy231@gmail.com", "Leomessi10@gmail.com", "Ronaldaraujo4@gmail.com", "johnwatson@gmail.com"]
}

In [80]:
df = pd.DataFrame(people)

In [81]:
df

Unnamed: 0,first,last,email
0,Medhat,Sobhy,medhatsobhy231@gmail.com
1,Leo,Messi,Leomessi10@gmail.com
2,Ronald,Araujo,Ronaldaraujo4@gmail.com
3,John,Watson,johnwatson@gmail.com


In [82]:
#sorting by column
df.sort_values(by = 'last' , ascending = False)#sort by last name in descending order

Unnamed: 0,first,last,email
3,John,Watson,johnwatson@gmail.com
0,Medhat,Sobhy,medhatsobhy231@gmail.com
1,Leo,Messi,Leomessi10@gmail.com
2,Ronald,Araujo,Ronaldaraujo4@gmail.com


In [83]:
#sort by multiple columns (if two column values has the same order then order by another column)
df.sort_values(by = ['last' , 'first'] , ascending= False)

Unnamed: 0,first,last,email
3,John,Watson,johnwatson@gmail.com
0,Medhat,Sobhy,medhatsobhy231@gmail.com
1,Leo,Messi,Leomessi10@gmail.com
2,Ronald,Araujo,Ronaldaraujo4@gmail.com


In [84]:
df.loc[3] = ['Emma', 'Watson', 'Emmawatson@gmail.com']

In [86]:
df.loc[4] = ['John', 'Watson', 'Johnwatson@gmail.com']

In [87]:
df

Unnamed: 0,first,last,email
0,Medhat,Sobhy,medhatsobhy231@gmail.com
1,Leo,Messi,Leomessi10@gmail.com
2,Ronald,Araujo,Ronaldaraujo4@gmail.com
3,Emma,Watson,Emmawatson@gmail.com
4,John,Watson,Johnwatson@gmail.com


In [89]:
df.sort_values(by = 'last' , ascending = False)

Unnamed: 0,first,last,email
3,Emma,Watson,Emmawatson@gmail.com
4,John,Watson,Johnwatson@gmail.com
0,Medhat,Sobhy,medhatsobhy231@gmail.com
1,Leo,Messi,Leomessi10@gmail.com
2,Ronald,Araujo,Ronaldaraujo4@gmail.com


In [88]:
df.sort_values(by = ['last' , 'first'] , ascending= False) #Notice the difference between this and the above sorting

Unnamed: 0,first,last,email
4,John,Watson,Johnwatson@gmail.com
3,Emma,Watson,Emmawatson@gmail.com
0,Medhat,Sobhy,medhatsobhy231@gmail.com
1,Leo,Messi,Leomessi10@gmail.com
2,Ronald,Araujo,Ronaldaraujo4@gmail.com


> use `inplace = ` to modify the original dataframe

In [90]:
#sorting by index
df.sort_index() 

Unnamed: 0,first,last,email
0,Medhat,Sobhy,medhatsobhy231@gmail.com
1,Leo,Messi,Leomessi10@gmail.com
2,Ronald,Araujo,Ronaldaraujo4@gmail.com
3,Emma,Watson,Emmawatson@gmail.com
4,John,Watson,Johnwatson@gmail.com


In [94]:
df['last'].sort_values()

2    Araujo
1     Messi
0     Sobhy
3    Watson
4    Watson
Name: last, dtype: object

# PART 9 -  Cleaning Data - Casting Datatypes and Handling Missing Values

In [95]:
import pandas as pd
import numpy as np
people = {
    'first': ['Medhat', 'Leo', 'Carl', 'Zayn', np.nan, None, 'NA'], 
    'last': ['Sobhy', 'Messi', 'Jung', 'Malik', np.nan, np.nan, 'Missing'], 
    'email': ['MedhatSobhy@gmail.com', 'Leomessi@email.com', 'Carljung@email.com', None, np.nan, 'Anonymous@email.com', 'NA'],
    'age': ['24', '38', '111', '32', None, None, 'Missing']
}

In [96]:
df = pd.DataFrame(people)


In [97]:
df

Unnamed: 0,first,last,email,age
0,Medhat,Sobhy,MedhatSobhy@gmail.com,24
1,Leo,Messi,Leomessi@email.com,38
2,Carl,Jung,Carljung@email.com,111
3,Zayn,Malik,,32
4,,,,
5,,,Anonymous@email.com,
6,,Missing,,Missing


In [98]:
df.dropna() #remove missing values (nan) 
#output --> custom nans isn't remove --> handle them when loading the data using replace

Unnamed: 0,first,last,email,age
0,Medhat,Sobhy,MedhatSobhy@gmail.com,24
1,Leo,Messi,Leomessi@email.com,38
2,Carl,Jung,Carljung@email.com,111
6,,Missing,,Missing


In [105]:
df.replace('NA', np.nan, inplace = True)
df.replace('Missing', np.nan, inplace= True)

In [106]:
df

Unnamed: 0,first,last,email,age
0,Medhat,Sobhy,MedhatSobhy@gmail.com,24.0
1,Leo,Messi,Leomessi@email.com,38.0
2,Carl,Jung,Carljung@email.com,111.0
3,Zayn,Malik,,32.0
4,,,,
5,,,Anonymous@email.com,
6,,,,


In [107]:
df.dropna(axis = 'index', how='all')
#output (how ='all') --> no columns were removed except row with index 4,6 that had all columns as nan 

Unnamed: 0,first,last,email,age
0,Medhat,Sobhy,MedhatSobhy@gmail.com,24.0
1,Leo,Messi,Leomessi@email.com,38.0
2,Carl,Jung,Carljung@email.com,111.0
3,Zayn,Malik,,32.0
5,,,Anonymous@email.com,


In [108]:
df.dropna(axis = 'columns' , how ='any')
# returns empty dataframe .. all columns contained atleast one nan

0
1
2
3
4
5
6


In [109]:
#delete record only if emails had nan
df.dropna(axis = 'index' , how ='any' , subset='email')


Unnamed: 0,first,last,email,age
0,Medhat,Sobhy,MedhatSobhy@gmail.com,24.0
1,Leo,Messi,Leomessi@email.com,38.0
2,Carl,Jung,Carljung@email.com,111.0
5,,,Anonymous@email.com,


In [113]:
#delete if email or last name are nans
df.dropna (axis ='index', how = 'any' , subset =['email', 'last'])

Unnamed: 0,first,last,email,age
0,Medhat,Sobhy,MedhatSobhy@gmail.com,24
1,Leo,Messi,Leomessi@email.com,38
2,Carl,Jung,Carljung@email.com,111


In [115]:
#delete if email and last name are nans
df.dropna (axis ='index', how = 'all' , subset =['email', 'last'])

Unnamed: 0,first,last,email,age
0,Medhat,Sobhy,MedhatSobhy@gmail.com,24.0
1,Leo,Messi,Leomessi@email.com,38.0
2,Carl,Jung,Carljung@email.com,111.0
3,Zayn,Malik,,32.0
5,,,Anonymous@email.com,


In [118]:
df.isna() #detect missing values

Unnamed: 0,first,last,email,age
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,True,False
4,True,True,True,True
5,True,True,False,True
6,True,True,True,True


In [119]:
df.fillna(0) #fill nans with the specified values

Unnamed: 0,first,last,email,age
0,Medhat,Sobhy,MedhatSobhy@gmail.com,24
1,Leo,Messi,Leomessi@email.com,38
2,Carl,Jung,Carljung@email.com,111
3,Zayn,Malik,0,32
4,0,0,0,0
5,0,0,Anonymous@email.com,0
6,0,0,0,0


In [120]:
df.dtypes

first    object
last     object
email    object
age      object
dtype: object

In [121]:
df['age'].mean()

TypeError: can only concatenate str (not "int") to str

In [122]:
df['age'] = df['age'].astype(int) #must cast to float or first convert nans to 0 then cast to int

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'

In [123]:
df['age'] = df['age'].astype(float)

In [124]:
df.dtypes

first     object
last      object
email     object
age      float64
dtype: object

In [125]:
df['age'].mean()

51.25