In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({'Location':['A','B','C','D'],
                   'Profit':['100 USD','40 CAD','200 CAD','1000 INR']})

df


Unnamed: 0,Location,Profit
0,A,100 USD
1,B,40 CAD
2,C,200 CAD
3,D,1000 INR


In [3]:
df[['Amount','Currency']] = df['Profit'].str.split(' ',expand=True)

df

Unnamed: 0,Location,Profit,Amount,Currency
0,A,100 USD,100,USD
1,B,40 CAD,40,CAD
2,C,200 CAD,200,CAD
3,D,1000 INR,1000,INR


In [4]:
df['Currency Value'] = np.where(df['Currency']=='USD',85,np.where(df['Currency']=='CAD',45,1))

df

Unnamed: 0,Location,Profit,Amount,Currency,Currency Value
0,A,100 USD,100,USD,85
1,B,40 CAD,40,CAD,45
2,C,200 CAD,200,CAD,45
3,D,1000 INR,1000,INR,1


In [5]:
df.dtypes

Location          object
Profit            object
Amount            object
Currency          object
Currency Value     int64
dtype: object

In [8]:
df['Amount'] = df['Amount'].astype(int)

df.dtypes

Location          object
Profit            object
Amount             int64
Currency          object
Currency Value     int64
dtype: object

In [9]:
df['Total Amount in INR'] = df['Amount']*df['Currency Value']

df

Unnamed: 0,Location,Profit,Amount,Currency,Currency Value,Total Amount in INR
0,A,100 USD,100,USD,85,8500
1,B,40 CAD,40,CAD,45,1800
2,C,200 CAD,200,CAD,45,9000
3,D,1000 INR,1000,INR,1,1000


In [10]:
df.drop(columns=['Profit','Amount','Currency','Currency Value'],inplace=True)

In [11]:
df

Unnamed: 0,Location,Total Amount in INR
0,A,8500
1,B,1800
2,C,9000
3,D,1000


In [13]:
df = pd.DataFrame({'Location':['A','B','A','D'],
                   'Profit':[100,200,100,400]})

df


Unnamed: 0,Location,Profit
0,A,100
1,B,200
2,A,100
3,D,400


In [16]:
# total duplicate records

df.duplicated().sum()

np.int64(1)

In [17]:
# displaying duplicate records

df[df.duplicated()]

Unnamed: 0,Location,Profit
2,A,100


In [19]:
# deleting duplicate records

df.drop_duplicates(inplace=True)

In [20]:
df

Unnamed: 0,Location,Profit
0,A,100
1,B,200
3,D,400


In [21]:
df = pd.DataFrame({'Location':['A','B','A','D'],
                   'Profit':[100,np.nan,np.nan,400],
                  'Discount':[5,15,12,np.nan]})

df


Unnamed: 0,Location,Profit,Discount
0,A,100.0,5.0
1,B,,15.0
2,A,,12.0
3,D,400.0,


In [24]:
# identify null values

df.isnull().sum()

Location    0
Profit      2
Discount    1
dtype: int64

In [25]:
df['Profit'].isnull().sum()

np.int64(2)

In [26]:
# deleting null values (any null in a record will be deleting entire record)

df.dropna()

Unnamed: 0,Location,Profit,Discount
0,A,100.0,5.0


In [29]:
# calculate null value percentage

df.isnull().sum()/len(df)

Location    0.00
Profit      0.50
Discount    0.25
dtype: float64

In [30]:
(df.isnull().sum()/len(df))*100

Location     0.0
Profit      50.0
Discount    25.0
dtype: float64

In [32]:
# filling null values with a constant 150

df['Profit'].fillna(150)

0    100.0
1    150.0
2    150.0
3    400.0
Name: Profit, dtype: float64

In [33]:
df['Profit'].mean()

np.float64(250.0)

In [34]:
df['Profit'].median()

250.0

In [35]:
df['Profit'].fillna(250)

0    100.0
1    250.0
2    250.0
3    400.0
Name: Profit, dtype: float64

In [36]:
# fill next value

df['Profit'].fillna(method='ffill')  #forward fill

  df['Profit'].fillna(method='ffill')


0    100.0
1    100.0
2    100.0
3    400.0
Name: Profit, dtype: float64

In [37]:
# fill the previous value

df['Profit'].fillna(method='bfill')  #backward fill

  df['Profit'].fillna(method='bfill')


0    100.0
1    400.0
2    400.0
3    400.0
Name: Profit, dtype: float64

In [38]:
df

Unnamed: 0,Location,Profit,Discount
0,A,100.0,5.0
1,B,,15.0
2,A,,12.0
3,D,400.0,


In [39]:
df.interpolate()

  df.interpolate()


Unnamed: 0,Location,Profit,Discount
0,A,100.0,5.0
1,B,200.0,15.0
2,A,300.0,12.0
3,D,400.0,12.0
