# Data Transformation

In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'city': ['Delhi', 'Mumbai', 'Chennai', 'Delhi'],
    'sales': [100, 150, 90, 120],
    'profit': [20, 35, 10, 25]
})

df

Unnamed: 0,city,sales,profit
0,Delhi,100,20
1,Mumbai,150,35
2,Chennai,90,10
3,Delhi,120,25


## Vectorized operations

In [2]:
df['sales'] * 1.1

0    110.0
1    165.0
2     99.0
3    132.0
Name: sales, dtype: float64

In [3]:
df['profit'] / df['sales']

0    0.200000
1    0.233333
2    0.111111
3    0.208333
dtype: float64

## map

In [4]:
city_map = {'Delhi': 'DL', 'Mumbai': 'MH', 'Chennai': 'TN'}
df['city'].map(city_map)

0    DL
1    MH
2    TN
3    DL
Name: city, dtype: object

In [5]:
df['city'].map(lambda x: x.upper())

0      DELHI
1     MUMBAI
2    CHENNAI
3      DELHI
Name: city, dtype: object

## apply (column-wise)

In [6]:
df[['sales', 'profit']].apply(np.mean)

sales     115.0
profit     22.5
dtype: float64

In [7]:
df[['sales', 'profit']].apply(lambda x: x.max() - x.min())

sales     60
profit    25
dtype: int64

## apply (row-wise)

In [8]:
df.apply(lambda r: r['profit'] / r['sales'], axis=1)

0    0.200000
1    0.233333
2    0.111111
3    0.208333
dtype: float64

In [9]:
df.apply(lambda r: f"{r['city']}-{r['sales']}", axis=1)

0     Delhi-100
1    Mumbai-150
2    Chennai-90
3     Delhi-120
dtype: object

## applymap

In [10]:
df[['sales', 'profit']].applymap(lambda x: x * 2)

  df[['sales', 'profit']].applymap(lambda x: x * 2)


Unnamed: 0,sales,profit
0,200,40
1,300,70
2,180,20
3,240,50


## Conditional transformations

In [11]:
np.where(df['sales'] > 120, 'High', 'Low')

array(['Low', 'High', 'Low', 'Low'], dtype='<U4')

In [12]:
df['category'] = np.where(df['profit'] > 20, 'Good', 'Average')
df

Unnamed: 0,city,sales,profit,category
0,Delhi,100,20,Average
1,Mumbai,150,35,Good
2,Chennai,90,10,Average
3,Delhi,120,25,Good


## Column-wise vs row-wise

In [13]:
df[['sales', 'profit']].sum(axis=0)

sales     460
profit     90
dtype: int64

In [14]:
df[['sales', 'profit']].sum(axis=1)

0    120
1    185
2    100
3    145
dtype: int64