
For detailed explanation of this examples refer to [pandas -add column](https://sparkbyexamples.com/pandas/pandas-add-column-usage-with-examples)





# Pandas Add Column to DataFrame

In [1]:

import pandas as pd
import numpy as np

technologies= {
    'Courses':["Spark","PySpark","Hadoop","Python","Pandas"],
    'Fee' :[22000,25000,23000,24000,26000],
    'Discount':[1000,2300,1000,1200,2500]
          }

df = pd.DataFrame(technologies)
df


Unnamed: 0,Courses,Fee,Discount
0,Spark,22000,1000
1,PySpark,25000,2300
2,Hadoop,23000,1000
3,Python,24000,1200
4,Pandas,26000,2500


In [2]:
# Add new column to the DataFrame
tutors = ['William', 'Henry', 'Michael', 'John', 'Messi']
df2 = df.assign(TutorsAssigned=tutors)
df2


Unnamed: 0,Courses,Fee,Discount,TutorsAssigned
0,Spark,22000,1000,William
1,PySpark,25000,2300,Henry
2,Hadoop,23000,1000,Michael
3,Python,24000,1200,John
4,Pandas,26000,2500,Messi


In [3]:
# Add a multiple columns to the DataFrame
MNCCompanies = ['TATA','HCL','Infosys','Google','Amazon']
df2 =df.assign(MNCComp = MNCCompanies,TutorsAssigned=tutors )
df2

Unnamed: 0,Courses,Fee,Discount,MNCComp,TutorsAssigned
0,Spark,22000,1000,TATA,William
1,PySpark,25000,2300,HCL,Henry
2,Hadoop,23000,1000,Infosys,Michael
3,Python,24000,1200,Google,John
4,Pandas,26000,2500,Amazon,Messi


In [4]:
# Derive New Column from Existing Column
df = pd.DataFrame(technologies)
df2=df.assign(Discount_Percent=lambda x: x.Fee * x.Discount / 100)
df2


Unnamed: 0,Courses,Fee,Discount,Discount_Percent
0,Spark,22000,1000,220000.0
1,PySpark,25000,2300,575000.0
2,Hadoop,23000,1000,230000.0
3,Python,24000,1200,288000.0
4,Pandas,26000,2500,650000.0


In [5]:
# Add a constant or empty value to the DataFrame.
df = pd.DataFrame(technologies)
df2=df.assign(A=None,B=0,C="")
df2


Unnamed: 0,Courses,Fee,Discount,A,B,C
0,Spark,22000,1000,,0,
1,PySpark,25000,2300,,0,
2,Hadoop,23000,1000,,0,
3,Python,24000,1200,,0,
4,Pandas,26000,2500,,0,


In [6]:
# Add New column to the existing DataFrame
df = pd.DataFrame(technologies)
df["MNCCompanies"] = MNCCompanies
df


Unnamed: 0,Courses,Fee,Discount,MNCCompanies
0,Spark,22000,1000,TATA
1,PySpark,25000,2300,HCL
2,Hadoop,23000,1000,Infosys
3,Python,24000,1200,Google
4,Pandas,26000,2500,Amazon


In [7]:
# Derive a new column from existing column
df['Discount_Percent'] = df['Fee'] * df['Discount'] / 100
df

Unnamed: 0,Courses,Fee,Discount,MNCCompanies,Discount_Percent
0,Spark,22000,1000,TATA,220000.0
1,PySpark,25000,2300,HCL,575000.0
2,Hadoop,23000,1000,Infosys,230000.0
3,Python,24000,1200,Google,288000.0
4,Pandas,26000,2500,Amazon,650000.0


In [8]:
# Add new column at the specific position
df = pd.DataFrame(technologies)
df.insert(0,'Tutors', tutors )
df


Unnamed: 0,Tutors,Courses,Fee,Discount
0,William,Spark,22000,1000
1,Henry,PySpark,25000,2300
2,Michael,Hadoop,23000,1000
3,John,Python,24000,1200
4,Messi,Pandas,26000,2500


In [9]:
# Add new column by mapping to the existing column
df = pd.DataFrame(technologies)
tutors = {"William":"Spark", "Henry":"PySpark", "Michael":"Hadoop","John":"Python", "Messi":"pandas"}
df['Tutors'] = tutors
df


Unnamed: 0,Courses,Fee,Discount,Tutors
0,Spark,22000,1000,William
1,PySpark,25000,2300,Henry
2,Hadoop,23000,1000,Michael
3,Python,24000,1200,John
4,Pandas,26000,2500,Messi
