In [188]:
import pandas as pd

# Create dummy data frame

In [189]:
people = {
    'first': ['mohamed', 'ali', 'mena', 'salma'],
    'last': ['khaled', 'ahmed', 'omar', 'gamal'],
    'email' : ['mohamed@gmail.com', 'ali@gmail.com', 'mena@gmail.com', 'salma@gamil.com'],
    'gender': ['male', 'male', 'female', 'female']
}

In [190]:
df = pd.DataFrame(people);

In [191]:
df

Unnamed: 0,first,last,email,gender
0,mohamed,khaled,mohamed@gmail.com,male
1,ali,ahmed,ali@gmail.com,male
2,mena,omar,mena@gmail.com,female
3,salma,gamal,salma@gamil.com,female


# Ways to select from a data frame

In [192]:
# select single column
df['first']

0    mohamed
1        ali
2       mena
3      salma
Name: first, dtype: object

In [193]:
df['last']

0    khaled
1     ahmed
2      omar
3     gamal
Name: last, dtype: object

In [194]:
# one column in a datafrom is series
type(df['email'])

pandas.core.series.Series

In [195]:
# another way to select one column by dot notation but here there is an error with column name that have space
df.email

0    mohamed@gmail.com
1        ali@gmail.com
2       mena@gmail.com
3      salma@gamil.com
Name: email, dtype: object

In [196]:
# select multi columns  
df[['first', 'last']]

Unnamed: 0,first,last
0,mohamed,khaled
1,ali,ahmed
2,mena,omar
3,salma,gamal


In [197]:
df[['first', 'email']]

Unnamed: 0,first,email
0,mohamed,mohamed@gmail.com
1,ali,ali@gmail.com
2,mena,mena@gmail.com
3,salma,salma@gamil.com


In [198]:
# can use iloc to select columns and row but iloc is not a function dont use ()
# iloc accept only index

# select first column
df.iloc[0]

first               mohamed
last                 khaled
email     mohamed@gmail.com
gender                 male
Name: 0, dtype: object

In [199]:
# select second column
df.iloc[1]

first               ali
last              ahmed
email     ali@gmail.com
gender             male
Name: 1, dtype: object

In [200]:
#select all data set 
df.iloc[:,:]

Unnamed: 0,first,last,email,gender
0,mohamed,khaled,mohamed@gmail.com,male
1,ali,ahmed,ali@gmail.com,male
2,mena,omar,mena@gmail.com,female
3,salma,gamal,salma@gamil.com,female


In [201]:
# select last columns
df.iloc[:, -1]

0      male
1      male
2    female
3    female
Name: gender, dtype: object

In [202]:
# select all columns except last columns
df.iloc[:, :-1]

Unnamed: 0,first,last,email
0,mohamed,khaled,mohamed@gmail.com
1,ali,ahmed,ali@gmail.com
2,mena,omar,mena@gmail.com
3,salma,gamal,salma@gamil.com


In [203]:
# select first three row
df.iloc[:3, :]

Unnamed: 0,first,last,email,gender
0,mohamed,khaled,mohamed@gmail.com,male
1,ali,ahmed,ali@gmail.com,male
2,mena,omar,mena@gmail.com,female


In [204]:
# select last two row
df.iloc[-2:, :]

Unnamed: 0,first,last,email,gender
2,mena,omar,mena@gmail.com,female
3,salma,gamal,salma@gamil.com,female


In [205]:
# can use loc to select columns and row but loc is not a function dont use ()
# iloc accept only name of colums 

# select columns that name is first
df.loc[:, 'first']

0    mohamed
1        ali
2       mena
3      salma
Name: first, dtype: object

In [206]:
df.loc[:, 'first': 'last']

Unnamed: 0,first,last
0,mohamed,khaled
1,ali,ahmed
2,mena,omar
3,salma,gamal


#  Data frame index

In [207]:
# set index of dataframe to email colums

df.set_index('email', inplace=True)

In [208]:
df

Unnamed: 0_level_0,first,last,gender
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
mohamed@gmail.com,mohamed,khaled,male
ali@gmail.com,ali,ahmed,male
mena@gmail.com,mena,omar,female
salma@gamil.com,salma,gamal,female


In [209]:
df.loc['ali@gmail.com':'salma@gamil.com' , 'last']

email
ali@gmail.com      ahmed
mena@gmail.com      omar
salma@gamil.com    gamal
Name: last, dtype: object

In [210]:
df.iloc[0]

first     mohamed
last       khaled
gender       male
Name: mohamed@gmail.com, dtype: object

In [211]:
# here we reset index of dataframe to default index [0, 1, 2, ....] 
# (inplace = True) => use to aply changes on original data frame 

df.reset_index(inplace=True)

In [212]:
df

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,omar,female
3,salma@gamil.com,salma,gamal,female


#  select row based on conditions

In [213]:
df[df['last'] == 'ahmed']

Unnamed: 0,email,first,last,gender
1,ali@gmail.com,ali,ahmed,male


In [214]:
df[(df['first'] == 'ali') | (df['first'] == 'mohamed')]

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male


In [215]:
df[df['gender'] == 'male']

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male


In [216]:
 df[~(df['email'] == 'sayed@gmail.com')]

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,omar,female
3,salma@gamil.com,salma,gamal,female


In [217]:
df

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,omar,female
3,salma@gamil.com,salma,gamal,female


# Columns names

In [218]:
# disply all colnmns names
df.columns

Index(['email', 'first', 'last', 'gender'], dtype='object')

In [219]:
# rename all columns names
df.columns = ['email', 'first_name', 'last_name', 'gender']

In [220]:
df.columns

Index(['email', 'first_name', 'last_name', 'gender'], dtype='object')

In [221]:
df

Unnamed: 0,email,first_name,last_name,gender
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,omar,female
3,salma@gamil.com,salma,gamal,female


In [222]:
# make all columns name upper case
df.columns = [x.upper() for x in df.columns]

In [223]:
df

Unnamed: 0,EMAIL,FIRST_NAME,LAST_NAME,GENDER
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,omar,female
3,salma@gamil.com,salma,gamal,female


In [224]:
# use rename method to rename specific column
df.rename(columns={'FIRST_NAME':'first', 'LAST_NAME': 'last'}, inplace=True)

In [225]:
df

Unnamed: 0,EMAIL,first,last,GENDER
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,omar,female
3,salma@gamil.com,salma,gamal,female


#  Update existing columns

In [226]:
# here we update all columns

df.loc[2] = ['heba@gmail.com', 'heba', 'khaled', 'female']

In [227]:
df

Unnamed: 0,EMAIL,first,last,GENDER
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male
2,heba@gmail.com,heba,khaled,female
3,salma@gamil.com,salma,gamal,female


In [228]:
# here we update two columns last and email only
df.loc[2, ['last', 'EMAIL']] = ['mena', 'mena@gmail.com']

In [229]:
df

Unnamed: 0,EMAIL,first,last,GENDER
0,mohamed@gmail.com,mohamed,khaled,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,heba,mena,female
3,salma@gamil.com,salma,gamal,female


In [230]:
# make all columns name upper case
df.columns = [x.lower() for x in df.columns]

In [231]:
df.loc[0, ['last']] = 'selim'

In [232]:
df

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,selim,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,heba,mena,female
3,salma@gamil.com,salma,gamal,female


In [233]:
df.at[2, 'last'] = 'khaled'

In [234]:
df

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,selim,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,heba,khaled,female
3,salma@gamil.com,salma,gamal,female


In [235]:
filt = (df['email'] == 'mena@gmail.com' )

In [236]:
# here we update user who email is mena@gmail.com 

df.loc[2, 'first'] = 'mena'

In [237]:
df

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,selim,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,khaled,female
3,salma@gamil.com,salma,gamal,female


In [238]:
df['email'] = df['email'].str.capitalize()

In [239]:
df

Unnamed: 0,email,first,last,gender
0,Mohamed@gmail.com,mohamed,selim,male
1,Ali@gmail.com,ali,ahmed,male
2,Mena@gmail.com,mena,khaled,female
3,Salma@gamil.com,salma,gamal,female


# apply function

In [240]:
# apply func work only on series
df['email'].apply(len)

0    17
1    13
2    14
3    15
Name: email, dtype: int64

In [241]:
def update_email(email):
    return email.lower();

In [242]:
df['email'].apply(update_email)

0    mohamed@gmail.com
1        ali@gmail.com
2       mena@gmail.com
3      salma@gamil.com
Name: email, dtype: object

In [243]:
df['email'] = df['email'].apply(update_email)

In [244]:
df

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,selim,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,khaled,female
3,salma@gamil.com,salma,gamal,female


In [245]:
df['email'] = df['email'].apply(lambda x: x.lower())

In [246]:
df

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,selim,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,khaled,female
3,salma@gamil.com,salma,gamal,female


In [247]:
df['email'].apply(len)

0    17
1    13
2    14
3    15
Name: email, dtype: int64

In [248]:
df.apply(len)

email     4
first     4
last      4
gender    4
dtype: int64

In [249]:
len(df['email'])

4

In [250]:
df.apply(pd.Series.min)

email     ali@gmail.com
first               ali
last              ahmed
gender           female
dtype: object

In [251]:
df.apply(lambda x: x.min())

email     ali@gmail.com
first               ali
last              ahmed
gender           female
dtype: object

# applymap function

In [252]:
# applymap work only on dateframe
df.applymap(len)

Unnamed: 0,email,first,last,gender
0,17,7,5,4
1,13,3,5,4
2,14,4,6,6
3,15,5,5,6


In [253]:
df.applymap(str.capitalize)

Unnamed: 0,email,first,last,gender
0,Mohamed@gmail.com,Mohamed,Selim,Male
1,Ali@gmail.com,Ali,Ahmed,Male
2,Mena@gmail.com,Mena,Khaled,Female
3,Salma@gamil.com,Salma,Gamal,Female


In [254]:
df

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,selim,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,khaled,female
3,salma@gamil.com,salma,gamal,female


# map function

In [255]:
# map func work only on series
df['first'].map({'ali': 'jone', 'mena': "hepa"})

0     NaN
1    jone
2    hepa
3     NaN
Name: first, dtype: object

In [256]:
# we can use replace instead of map because can out nan cells
df['first'].replace({'ali': 'jone', 'heba': "mena"})

0    mohamed
1       jone
2       mena
3      salma
Name: first, dtype: object

In [257]:
df

Unnamed: 0,email,first,last,gender
0,mohamed@gmail.com,mohamed,selim,male
1,ali@gmail.com,ali,ahmed,male
2,mena@gmail.com,mena,khaled,female
3,salma@gamil.com,salma,gamal,female


In [258]:
df['full_name'] = df['first'] + ' ' + df['last']

In [259]:
df

Unnamed: 0,email,first,last,gender,full_name
0,mohamed@gmail.com,mohamed,selim,male,mohamed selim
1,ali@gmail.com,ali,ahmed,male,ali ahmed
2,mena@gmail.com,mena,khaled,female,mena khaled
3,salma@gamil.com,salma,gamal,female,salma gamal


# Drop Columns

In [283]:
df.drop(columns=['first', 'last'], inplace=True)

In [284]:
df

Unnamed: 0,email,gender,full_name
0,mohamed@gmail.com,male,mohamed selim
1,ali@gmail.com,male,ali ahmed
2,mena@gmail.com,female,mena khaled
3,salma@gamil.com,female,salma gamal


In [289]:
# here we can get first name last name from full name by split function
# (expand= True) => to convert result to datafrom instead of array
df['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,mohamed,selim
1,ali,ahmed
2,mena,khaled
3,salma,gamal


In [291]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)

In [292]:
df

Unnamed: 0,email,gender,full_name,first,last
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim
1,ali@gmail.com,male,ali ahmed,ali,ahmed
2,mena@gmail.com,female,mena khaled,mena,khaled
3,salma@gamil.com,female,salma gamal,salma,gamal


# append rows

In [295]:
df.append({'first': 'wael'}, ignore_index=True)

  df.append({'first': 'wael'}, ignore_index=True)


Unnamed: 0,email,gender,full_name,first,last
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim
1,ali@gmail.com,male,ali ahmed,ali,ahmed
2,mena@gmail.com,female,mena khaled,mena,khaled
3,salma@gamil.com,female,salma gamal,salma,gamal
4,,,,wael,


In [297]:
people = {
    'first': ['gemi', 'assem'],
    'last': ['magdi', 'pasel'],
    'email' : ['assem@gmail.com', 'pasel@gmail.com'],
    'gender': ['male', 'male']
}

df2 = pd.DataFrame(people)

In [298]:
df2

Unnamed: 0,first,last,email,gender
0,gemi,magdi,assem@gmail.com,male
1,assem,pasel,pasel@gmail.com,male


In [301]:
df.append(df2, ignore_index=True, sort=True)

  df.append(df2, ignore_index=True, sort=True)


Unnamed: 0,email,first,full_name,gender,last
0,mohamed@gmail.com,mohamed,mohamed selim,male,selim
1,ali@gmail.com,ali,ali ahmed,male,ahmed
2,mena@gmail.com,mena,mena khaled,female,khaled
3,salma@gamil.com,salma,salma gamal,female,gamal
4,assem@gmail.com,gemi,,male,magdi
5,pasel@gmail.com,assem,,male,pasel


In [302]:
df = df.append(df2, ignore_index=True, sort=False)

  df = df.append(df2, ignore_index=True, sort=False)


In [303]:
df

Unnamed: 0,email,gender,full_name,first,last
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim
1,ali@gmail.com,male,ali ahmed,ali,ahmed
2,mena@gmail.com,female,mena khaled,mena,khaled
3,salma@gamil.com,female,salma gamal,salma,gamal
4,assem@gmail.com,male,,gemi,magdi
5,pasel@gmail.com,male,,assem,pasel


# Drop row based on index

In [306]:
df.drop(index=4, inplace=True)

In [307]:
 df

Unnamed: 0,email,gender,full_name,first,last
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim
1,ali@gmail.com,male,ali ahmed,ali,ahmed
2,mena@gmail.com,female,mena khaled,mena,khaled
3,salma@gamil.com,female,salma gamal,salma,gamal
5,pasel@gmail.com,male,,assem,pasel


In [308]:
filt = df[df['last'] == 'pasel'].index # here we get index of user who last name is pasel by index
df.drop(index= filt) # and drop it

Unnamed: 0,email,gender,full_name,first,last
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim
1,ali@gmail.com,male,ali ahmed,ali,ahmed
2,mena@gmail.com,female,mena khaled,mena,khaled
3,salma@gamil.com,female,salma gamal,salma,gamal


In [309]:
df

Unnamed: 0,email,gender,full_name,first,last
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim
1,ali@gmail.com,male,ali ahmed,ali,ahmed
2,mena@gmail.com,female,mena khaled,mena,khaled
3,salma@gamil.com,female,salma gamal,salma,gamal
5,pasel@gmail.com,male,,assem,pasel


# Sorting

In [310]:
# here we sort by last name columns desending
df.sort_values(by='last')

Unnamed: 0,email,gender,full_name,first,last
1,ali@gmail.com,male,ali ahmed,ali,ahmed
3,salma@gamil.com,female,salma gamal,salma,gamal
2,mena@gmail.com,female,mena khaled,mena,khaled
5,pasel@gmail.com,male,,assem,pasel
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim


In [311]:
df.sort_values(by=['email', 'last'], ascending=True)

Unnamed: 0,email,gender,full_name,first,last
1,ali@gmail.com,male,ali ahmed,ali,ahmed
2,mena@gmail.com,female,mena khaled,mena,khaled
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim
5,pasel@gmail.com,male,,assem,pasel
3,salma@gamil.com,female,salma gamal,salma,gamal


In [314]:
df.sort_values(by=['email', 'last'], ascending=[True, False], inplace=True)

In [315]:
df

Unnamed: 0,email,gender,full_name,first,last
1,ali@gmail.com,male,ali ahmed,ali,ahmed
2,mena@gmail.com,female,mena khaled,mena,khaled
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim
5,pasel@gmail.com,male,,assem,pasel
3,salma@gamil.com,female,salma gamal,salma,gamal


In [316]:
# here we sort by index not values of a certain columns
df.sort_index()

Unnamed: 0,email,gender,full_name,first,last
0,mohamed@gmail.com,male,mohamed selim,mohamed,selim
1,ali@gmail.com,male,ali ahmed,ali,ahmed
2,mena@gmail.com,female,mena khaled,mena,khaled
3,salma@gamil.com,female,salma gamal,salma,gamal
5,pasel@gmail.com,male,,assem,pasel


In [317]:
df['last'].sort_values()

1     ahmed
3     gamal
2    khaled
5     pasel
0     selim
Name: last, dtype: object