# `Data	Transformation	(Sorting,	Filtering,	Applying	Functions)`

In [2]:
import pandas as pd

### Sorting

In [3]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 22],
    'Salary': [50000, 60000, 45000]
})
df

Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
1,Bob,30,60000
2,Charlie,22,45000


In [4]:
df.sort_values(by='Age')

Unnamed: 0,Name,Age,Salary
2,Charlie,22,45000
0,Alice,25,50000
1,Bob,30,60000


In [5]:
df.sort_values(by='Salary', ascending=False)

Unnamed: 0,Name,Age,Salary
1,Bob,30,60000
0,Alice,25,50000
2,Charlie,22,45000


In [6]:
df.sort_values(by='Name')

Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
1,Bob,30,60000
2,Charlie,22,45000


In [7]:
df.sort_values(by='Name', ascending=False)

Unnamed: 0,Name,Age,Salary
2,Charlie,22,45000
1,Bob,30,60000
0,Alice,25,50000


In [8]:
df2 = df.sort_values(by='Age')
df2

Unnamed: 0,Name,Age,Salary
2,Charlie,22,45000
0,Alice,25,50000
1,Bob,30,60000


In [9]:
df2.sort_index()

Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
1,Bob,30,60000
2,Charlie,22,45000


### Filtering

In [10]:
data = {
    'Department': ['HR', 'Finance', 'IT', 'HR', 'IT', 'Finance', 'HR'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace'],
    'Salary': [5000, 6000, 7000, 5200, 7500, 6400, 5500],
    'Experience': [3, 4, 5, 2, 6, 5, 3]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Department,Employee,Salary,Experience
0,HR,Alice,5000,3
1,Finance,Bob,6000,4
2,IT,Charlie,7000,5
3,HR,David,5200,2
4,IT,Eve,7500,6
5,Finance,Frank,6400,5
6,HR,Grace,5500,3


In [11]:
condition = df['Salary'] > 6000
condition

0    False
1    False
2     True
3    False
4     True
5     True
6    False
Name: Salary, dtype: bool

In [12]:
df[condition]

Unnamed: 0,Department,Employee,Salary,Experience
2,IT,Charlie,7000,5
4,IT,Eve,7500,6
5,Finance,Frank,6400,5


In [13]:
df[df['Salary'] > 6000]

Unnamed: 0,Department,Employee,Salary,Experience
2,IT,Charlie,7000,5
4,IT,Eve,7500,6
5,Finance,Frank,6400,5


In [14]:
(df['Salary'] > 6000) & (df['Department'] == 'IT')

0    False
1    False
2     True
3    False
4     True
5    False
6    False
dtype: bool

In [15]:
# df[(condition1) & (condition2) & (condition3)]

df[(df['Salary'] > 6000) & (df['Department'] == 'IT')]

Unnamed: 0,Department,Employee,Salary,Experience
2,IT,Charlie,7000,5
4,IT,Eve,7500,6


### Applying functions (Transformation)
- map() method : Can be applied only on Series 
- apply() method : Can be applied either on Series or dataframe
- applymap() method : Can be applied only on DataFrame

In [16]:
type(df['Department'])

pandas.core.series.Series

In [17]:
l = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
s = pd.Series(l)
s

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [18]:
x = 1
res = 'The value is: ' +  str(x)
res

'The value is: 1'

In [19]:
s = s.map(lambda x: 'The value is: ' + str(x) )
s

0     The value is: 1
1     The value is: 2
2     The value is: 3
3     The value is: 4
4     The value is: 5
5     The value is: 6
6     The value is: 7
7     The value is: 8
8     The value is: 9
9    The value is: 10
dtype: object

In [20]:
l = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
s = pd.Series(l)
s

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [21]:
s = s.map(lambda x: x**2 + 3*x - 2)
s

0      2
1      8
2     16
3     26
4     38
5     52
6     68
7     86
8    106
9    128
dtype: int64

In [22]:
l = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
s = pd.Series(l)
s

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [23]:
s1 = s.apply(lambda x: x**2 + 3*x - 2)
s1

0      2
1      8
2     16
3     26
4     38
5     52
6     68
7     86
8    106
9    128
dtype: int64

In [24]:
df

Unnamed: 0,Department,Employee,Salary,Experience
0,HR,Alice,5000,3
1,Finance,Bob,6000,4
2,IT,Charlie,7000,5
3,HR,David,5200,2
4,IT,Eve,7500,6
5,Finance,Frank,6400,5
6,HR,Grace,5500,3


In [25]:
df['Salary'].apply(lambda x: (x/1000) + 1)

0    6.0
1    7.0
2    8.0
3    6.2
4    8.5
5    7.4
6    6.5
Name: Salary, dtype: float64

In [26]:
df['Salary_transformed'] = df['Salary'].apply(lambda x: (x/1000) + 1)
df

Unnamed: 0,Department,Employee,Salary,Experience,Salary_transformed
0,HR,Alice,5000,3,6.0
1,Finance,Bob,6000,4,7.0
2,IT,Charlie,7000,5,8.0
3,HR,David,5200,2,6.2
4,IT,Eve,7500,6,8.5
5,Finance,Frank,6400,5,7.4
6,HR,Grace,5500,3,6.5


In [27]:
'HR' * 3, 4 * 3

('HRHRHR', 12)

In [28]:
df[['a1', 'a2']] = df[['Salary', 'Experience']].applymap(lambda x: x + 1)

  df[['a1', 'a2']] = df[['Salary', 'Experience']].applymap(lambda x: x + 1)


In [29]:
df

Unnamed: 0,Department,Employee,Salary,Experience,Salary_transformed,a1,a2
0,HR,Alice,5000,3,6.0,5001,4
1,Finance,Bob,6000,4,7.0,6001,5
2,IT,Charlie,7000,5,8.0,7001,6
3,HR,David,5200,2,6.2,5201,3
4,IT,Eve,7500,6,8.5,7501,7
5,Finance,Frank,6400,5,7.4,6401,6
6,HR,Grace,5500,3,6.5,5501,4


In [30]:
data = {
    'Department': ['HR', 'Finance', 'IT', 'HR', 'IT', 'Finance', 'HR'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace'],
    'Salary': [5000, 6000, 7000, 5200, 7500, 6400, 5500],
    'Experience': [3, 4, 5, 2, 6, 5, 3],
    'Distance_Travelled': [10000, 11000, 15000, 1000, 1200, 1500, 1750]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Department,Employee,Salary,Experience,Distance_Travelled
0,HR,Alice,5000,3,10000
1,Finance,Bob,6000,4,11000
2,IT,Charlie,7000,5,15000
3,HR,David,5200,2,1000
4,IT,Eve,7500,6,1200
5,Finance,Frank,6400,5,1500
6,HR,Grace,5500,3,1750


In [31]:
df[['Salary_thousanda', 'Distance_Travelled_in_km']] = (
    df[['Salary', 'Distance_Travelled']].applymap(lambda x: x / 1000)
)
df

  df[['Salary', 'Distance_Travelled']].applymap(lambda x: x / 1000)


Unnamed: 0,Department,Employee,Salary,Experience,Distance_Travelled,Salary_thousanda,Distance_Travelled_in_km
0,HR,Alice,5000,3,10000,5.0,10.0
1,Finance,Bob,6000,4,11000,6.0,11.0
2,IT,Charlie,7000,5,15000,7.0,15.0
3,HR,David,5200,2,1000,5.2,1.0
4,IT,Eve,7500,6,1200,7.5,1.2
5,Finance,Frank,6400,5,1500,6.4,1.5
6,HR,Grace,5500,3,1750,5.5,1.75


In [32]:
df[['Department', 'Employee']] = df[['Department', 'Employee']].applymap( lambda x: x.strip())
df

  df[['Department', 'Employee']] = df[['Department', 'Employee']].applymap( lambda x: x.strip())


Unnamed: 0,Department,Employee,Salary,Experience,Distance_Travelled,Salary_thousanda,Distance_Travelled_in_km
0,HR,Alice,5000,3,10000,5.0,10.0
1,Finance,Bob,6000,4,11000,6.0,11.0
2,IT,Charlie,7000,5,15000,7.0,15.0
3,HR,David,5200,2,1000,5.2,1.0
4,IT,Eve,7500,6,1200,7.5,1.2
5,Finance,Frank,6400,5,1500,6.4,1.5
6,HR,Grace,5500,3,1750,5.5,1.75
