In [0]:
import pandas as pd

In [0]:
# Creating a DataFrame from a dictionary
emp= {"name":["Rohish","Smit","Priya"],"gender":["Male","Male","Female"], "email":["rohish@gmail.com","smit@gmail.com","priya@gmail.com"] }
df = pd.DataFrame(emp)
df

Unnamed: 0,name,gender,email
0,Rohish,Male,rohish@gmail.com
1,Smit,Male,smit@gmail.com
2,Priya,Female,priya@gmail.com


#### Filtering with loc and iloc


In [0]:
fc = (df['name'] == 'Rohish')
# df[fc]
df.loc[fc, ['gender']]

Unnamed: 0,gender
0,Male


In [0]:
df.index = ['a', 'b', 'c']
df

Unnamed: 0,name,gender,email
a,Rohish,Male,rohish@gmail.com
b,Smit,Male,smit@gmail.com
c,Priya,Female,priya@gmail.com


In [0]:
df.index  = [0,1,2]
df

Unnamed: 0,name,gender,email
0,Rohish,Male,rohish@gmail.com
1,Smit,Male,smit@gmail.com
2,Priya,Female,priya@gmail.com


### Renaming columns in pandasDataframe

In [0]:
# to see the columns in dataframe
df.columns

Out[12]: Index(['name', 'gender', 'email'], dtype='object')

##### Renaming All Columns
You can rename all columns at once by assigning a new list of column names to the columns attribute.

In [0]:
df.columns = ['emp_name', 'emp_gender', 'emp_email']
df

Unnamed: 0,emp_name,emp_gender,emp_email
0,Rohish,Male,rohish@gmail.com
1,Smit,Male,smit@gmail.com
2,Priya,Female,priya@gmail.com


##### Renaming Columns Using `rename()` Method

The `rename()` method allows you to rename columns by passing a dictionary where the keys are the current column names, and the values are the new names.

In [0]:
df.rename(columns = {'emp_name':'name', 'emp_gender':'gender'})

Unnamed: 0,name,gender,emp_email
0,Rohish,Male,rohish@gmail.com
1,Smit,Male,smit@gmail.com
2,Priya,Female,priya@gmail.com


In [0]:
# If you want to rename the columns in the original DataFrame without creating a new one, you can use the inplace=True argument.
df.rename(columns = {'emp_name':'name', 'emp_gender':'gender'}, inplace=True)
df

Unnamed: 0,name,gender,emp_email
0,Rohish,Male,rohish@gmail.com
1,Smit,Male,smit@gmail.com
2,Priya,Female,priya@gmail.com


##### Renaming Columns Using a Function

You can also rename columns by applying a function to each column name using `rename()` with lambda or any other function.

In [0]:
# Convert all column names to lowercase
df_lower = df.rename(columns=str.lower)
print(df_lower)

     name  gender         emp_email
0  Rohish    Male  rohish@gmail.com
1    Smit    Male    smit@gmail.com
2   Priya  Female   priya@gmail.com


##### Renaming Columns with Index Mapping
You can also rename columns by indexing them if you have a specific pattern to 

In [0]:
df.columns.values[0] = 'name'
df.columns.values[2] = 'email'
df

Unnamed: 0,name,gender,email
0,Rohish,Male,rohish@gmail.com
1,Smit,Male,smit@gmail.com
2,Priya,Female,priya@gmail.com


### Updating columns in a Pandas DataFrame

##### Updating a Column with a Scalar Value
You can set all values in a column to a single value.

In [0]:
df['gender'] = 'Unknown'
df

Unnamed: 0,name,gender,email
0,Rohish,Unknown,rohish@gmail.com
1,Smit,Unknown,smit@gmail.com
2,Priya,Unknown,priya@gmail.com


##### Updating a Column Based on a Condition
You can update values in a column based on a condition applied to another column or the same column

In [0]:
fc = (df['gender']=='Unknown')
df.loc[fc, 'gender'] = 'Female'
df

Unnamed: 0,name,gender,email
0,Rohish,Female,rohish@gmail.com
1,Smit,Female,smit@gmail.com
2,Priya,Female,priya@gmail.com


In [0]:
# changing whole row value using index
df.loc[0] = ['Zade','Male','zade@gmail.com']
df

Unnamed: 0,name,gender,email
0,Zade,Male,zade@gmail.com
1,Smit,Female,smit@gmail.com
2,Priya,Female,priya@gmail.com


In [0]:
# In first row, changing name and email
df.loc[0,['name','email']] = ['Rohish','rohish@gmail.com']
df

Unnamed: 0,name,gender,email
0,Rohish,Male,rohish@gmail.com
1,Smit,Female,smit@gmail.com
2,Priya,Female,priya@gmail.com


In [0]:
# using slicing
df.iloc[:, 1] = 'Male'
df

Unnamed: 0,name,gender,email
0,Rohish,Male,rohish@gmail.com
1,Smit,Male,smit@gmail.com
2,Priya,Male,priya@gmail.com


##### Updating a Column with Another Column

In [0]:
for list1 in df['email'].str.split('@'):
    print(list1[1])

gmail.com
gmail.com
gmail.com


In [0]:
[value[1] for value in df['email'].str.split('@')]

Out[68]: ['gmail.com', 'gmail.com', 'gmail.com']

In [0]:
# Create a new column email type take the second part of email after @
df['email_type'] = [value[1] for value in df['email'].str.split('@')]
df

Unnamed: 0,name,gender,email,email_type
0,Rohish,Male,rohish@gmail.com,gmail.com
1,Smit,Male,smit@gmail.com,gmail.com
2,Priya,Male,priya@gmail.com,gmail.com


##### Using a Function to Update a Column

In [0]:
df['gender_len'] = df['gender'].apply(len)
df

Unnamed: 0,name,gender,email,email_type,gender_len
0,Rohish,Male,rohish@gmail.com,gmail.com,4
1,Smit,Male,smit@gmail.com,gmail.com,4
2,Priya,Male,priya@gmail.com,gmail.com,4


In [0]:
# user defined function to update gender
def update_email(col):
    return col.upper()

In [0]:
df['email'].apply(update_email)

Out[78]: 0    ROHISH@GMAIL.COM
1      SMIT@GMAIL.COM
2     PRIYA@GMAIL.COM
Name: email, dtype: object

In [0]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,name,gender,email,email_type,gender_len
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,4
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,4


In [0]:
df['name_gender'] = df['name'] + ' ' + df['gender']
df

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,4,Smit Male
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,4,Priya Male


In [0]:
df['salary'] = 20000
df

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender,salary
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male,20000
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,4,Smit Male,20000
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,4,Priya Male,20000


In [0]:
df.loc[(df['name']=='Ankit') , 'salary'] = 5000

In [0]:
# df.drop(columns='gender_len')
# to apply udf to all the column
# df.applymap(update_email)

### Dropping/Dropping Rows
You can drop rows by specifying their index labels using the drop() method.

In [0]:
# Drop the row with index 2
df.drop(index=2)

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender,salary
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male,20000
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,4,Smit Male,20000


In [0]:
df.drop(1)

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender,salary
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male,20000
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,4,Priya Male,20000


In [0]:
# Dropping Multiple Rows
df.drop([1,2])

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender,salary
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male,20000


In [0]:
# drop is not a inplace operation
df

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender,salary
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male,20000
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,4,Smit Male,20000
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,4,Priya Male,20000


In [0]:
# dropping with a conditon
fc = (df['salary'] >= 6000)
fc

Out[95]: 0    True
1    True
2    True
Name: salary, dtype: bool

In [0]:
df.drop(index=df[fc].index)

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender,salary


In [0]:
# Drop duplicate rows
df.drop_duplicates()

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender,salary
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male,20000
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,4,Smit Male,20000
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,4,Priya Male,20000


### Dropping Columns
You can drop a single column using the drop() method. Set the axis parameter to 1 (or columns) to indicate you're dropping a column.

In [0]:
#delete a column
df.drop(columns='salary')

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,4,Smit Male
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,4,Priya Male


In [0]:
df.drop('salary', axis=1)

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,4,Smit Male
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,4,Priya Male


In [0]:
# Dropping Multiple Columns: You can drop multiple columns by passing a list of column names to the drop() method.
df.drop(['gender_len', 'name_gender'], axis=1)

Unnamed: 0,name,gender,email,email_type,salary
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,20000
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,20000
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,20000


In [0]:
df

Unnamed: 0,name,gender,email,email_type,gender_len,name_gender,salary
0,Rohish,Male,ROHISH@GMAIL.COM,gmail.com,4,Rohish Male,20000
1,Smit,Male,SMIT@GMAIL.COM,gmail.com,4,Smit Male,20000
2,Priya,Male,PRIYA@GMAIL.COM,gmail.com,4,Priya Male,20000


In [0]:
# Dropping Columns In-Place
df.drop(['gender_len', 'name_gender', 'email_type'], axis=1, inplace=True)
df


Unnamed: 0,name,gender,email,salary
0,Rohish,Male,ROHISH@GMAIL.COM,20000
1,Smit,Male,SMIT@GMAIL.COM,20000
2,Priya,Male,PRIYA@GMAIL.COM,20000


### Sorting in Pandas Dataframe
Sorting a DataFrame in pandas can be done using the `sort_values()` and `sort_index()` methods.

In [0]:
df

Unnamed: 0,name,gender,email,salary
0,Rohish,Male,rohish@gmail.com,30000
1,Smit,Male,smit@gmail.com,25000
2,Priya,Female,priya@gmail.com,40000


##### Sorting by Values
To sort a DataFrame by the values of one or more columns, use sort_values().

In [0]:
# df.sort_values(by = 'name')
df.sort_values(by = 'name', inplace=True)

In [0]:
df

Unnamed: 0,name,gender,email,salary
2,Priya,Female,priya@gmail.com,40000
0,Rohish,Male,rohish@gmail.com,30000
1,Smit,Male,smit@gmail.com,25000


In [0]:
# By default this methods sort the data in ascending order
df.sort_values(by = 'salary', ascending=False)

Unnamed: 0,name,gender,email,salary
2,Priya,Female,priya@gmail.com,40000
0,Rohish,Male,rohish@gmail.com,30000
1,Smit,Male,smit@gmail.com,25000


In [0]:
# sorting by multiple columns 
df.sort_values(by = ['gender', 'salary'], ascending=[True, False])

Unnamed: 0,name,gender,email,salary
2,Priya,Female,priya@gmail.com,40000
0,Rohish,Male,rohish@gmail.com,30000
1,Smit,Male,smit@gmail.com,25000


##### Sorting by Index
To sort a DataFrame by its index (row labels), use sort_index().

In [0]:
# Sort by index
df.sort_index(inplace=True)
df

Unnamed: 0,name,gender,email,salary
0,Rohish,Male,rohish@gmail.com,30000
1,Smit,Male,smit@gmail.com,25000
2,Priya,Female,priya@gmail.com,40000
