For detailed explanation of this examples refer to [pandas -drop-rows-on-column-value](https://sparkbyexamples.com/pandas/pandas-drop-rows-usage-with-examples)

# Pandas Drop Rows Based on Column Value

In [1]:
# Create pandas DataFrame
import pandas as pd
import numpy as np
technologies = {
    'Courses':["Spark","PySpark","Hadoop","Python"],
    'Fee' :[22000,25000,np.nan,24000],
    'Duration':['30day',None,'55days',np.nan],
    'Discount':[1000,2300,1000,np.nan]
          }
df = pd.DataFrame(technologies)
df



   Courses      Fee Duration  Discount
0    Spark  22000.0    30day    1000.0
1  PySpark  25000.0     None    2300.0
2   Hadoop      NaN   55days    1000.0
3   Python  24000.0      NaN       NaN


In [2]:

df.drop(df[df['Fee'] >= 24000].index, inplace = True)
df


  Courses      Fee Duration  Discount
0   Spark  22000.0    30day    1000.0
2  Hadoop      NaN   55days    1000.0


In [5]:

# Remove row
df = pd.DataFrame(technologies)
df2 = df[df.Fee >= 24000]
df2




   Courses      Fee Duration  Discount
1  PySpark  25000.0     None    2300.0
3   Python  24000.0      NaN       NaN


In [13]:
#Using loc[]
df=pd.DataFrame(technologies)
df2 = df.loc[df["Fee"] >= 24000 ]
print(df2)


   Courses      Fee Duration  Discount
1  PySpark  25000.0     None    2300.0
3   Python  24000.0      NaN       NaN


In [11]:
# Delect rows based on multiple column value
df = pd.DataFrame(technologies)
df = df[ (df['Fee'] >= 22000) & (df['Discount'] == 2300)]
df


Unnamed: 0,Courses,Fee,Duration,Discount
1,PySpark,25000.0,,2300.0


In [17]:
# Drop rows with None/NaN values
df=pd.DataFrame(technologies)
df2 = df[df.Duration.notnull()]
df2


  Courses      Fee Duration  Discount
0   Spark  22000.0    30day    1000.0
2  Hadoop      NaN   55days    1000.0


In [18]:
# Delete rows using DataFrame.query()
df2=df.query("Courses == 'Spark'")
df2




Unnamed: 0,Courses,Fee,Duration,Discount
0,Spark,22000.0,30day,1000.0


In [19]:
#Using variable
value='Spark'
df2=df.query("Courses == @value")
df2


Unnamed: 0,Courses,Fee,Duration,Discount
0,Spark,22000.0,30day,1000.0


In [23]:
#Not equals, in & multiple conditions
df=pd.DataFrame(technologies)
df2=df.query("Courses != 'Spark'")
df2


Unnamed: 0,Courses,Fee,Duration,Discount
1,PySpark,25000.0,,2300.0
2,Hadoop,,55days,1000.0
3,Python,24000.0,,


In [24]:

#Not equals, in & multiple conditions
df2=df.query("Courses in ('Spark','PySpark')")
df2


Unnamed: 0,Courses,Fee,Duration,Discount
0,Spark,22000.0,30day,1000.0
1,PySpark,25000.0,,2300.0


In [27]:
# Other ways to Delete Rows
df2=df.loc[df['Courses'] == value]
df2


Unnamed: 0,Courses,Fee,Duration,Discount
0,Spark,22000.0,30day,1000.0


In [28]:
# Other ways to Delete Rows
df2=df.loc[df['Courses'] != 'Spark']
df2


Unnamed: 0,Courses,Fee,Duration,Discount
1,PySpark,25000.0,,2300.0
2,Hadoop,,55days,1000.0
3,Python,24000.0,,


In [44]:
# Delect rows based on inverse of column values
df=pd.DataFrame(technologies)
df1 = df[~(df['Courses'] == "PySpark")].index 
df.drop(df1, inplace = True)
df



   Courses      Fee Duration  Discount
1  PySpark  25000.0     None    2300.0
