 # Name: Tamanna Vaikkath 
 # PRN: 22070521094


# **Data Transformation Operations**

In [2]:
import pandas as pd

## **Dataset Creation**

In [3]:
records = {
    'ID': [101, 102, 103, 104, 105, 106],
    'Full Name': ['Aarav', 'Sanya', 'Rohit', 'Meera', 'Kabir', 'Ishita'],
    'Age': [27, 32, 28, 35, 30, 25],
    'Country': ['India', 'India', 'USA', 'UK', 'Canada', 'Australia'],
    'Revenue': [500, 700, 200, 950, 300, 450]
}

In [4]:
sales_data = pd.DataFrame(records)

## **Character Map Transformation**  
Convert the "Full Name" column to uppercase

In [5]:
sales_data['Full Name'] = sales_data['Full Name'].str.upper()
print("\nCharacter Map Transformation:\n", sales_data)


Character Map Transformation:
     ID Full Name  Age    Country  Revenue
0  101     AARAV   27      India      500
1  102     SANYA   32      India      700
2  103     ROHIT   28        USA      200
3  104     MEERA   35         UK      950
4  105     KABIR   30     Canada      300
5  106    ISHITA   25  Australia      450


## **Multicast Transformation**  
Create duplicate copies of the dataset  

In [6]:
sales_data_copy1 = sales_data.copy()
sales_data_copy2 = sales_data.copy()
print("\nMulticast Transformation - Two copies created")


Multicast Transformation - Two copies created


## **Conditional Split Transformation**  
Filter records where Revenue is greater than 400  

In [7]:
high_revenue = sales_data[sales_data['Revenue'] > 400]
print("\nConditional Split Transformation:\n", high_revenue)


Conditional Split Transformation:
     ID Full Name  Age    Country  Revenue
0  101     AARAV   27      India      500
1  102     SANYA   32      India      700
3  104     MEERA   35         UK      950
5  106    ISHITA   25  Australia      450


## **Aggregation Transformation**  
Calculate total revenue per country  

In [8]:
revenue_by_country = sales_data.groupby('Country')['Revenue'].sum().reset_index()
print("\nAggregation Transformation:\n", revenue_by_country)


Aggregation Transformation:
      Country  Revenue
0  Australia      450
1     Canada      300
2      India     1200
3         UK      950
4        USA      200


## **Sorting Transformation**  
Sort data by Revenue in descending order  


In [9]:
sorted_sales = sales_data.sort_values(by='Revenue', ascending=False)
print("\nSorting Transformation:\n", sorted_sales)



Sorting Transformation:
     ID Full Name  Age    Country  Revenue
3  104     MEERA   35         UK      950
1  102     SANYA   32      India      700
0  101     AARAV   27      India      500
5  106    ISHITA   25  Australia      450
4  105     KABIR   30     Canada      300
2  103     ROHIT   28        USA      200


## **Derived Column Transformation**  
Categorize Revenue as 'High' or 'Low'  

In [10]:
sales_data['Revenue Category'] = sales_data['Revenue'].apply(lambda x: 'High' if x > 400 else 'Low')
print("\nDerived Column Transformation:\n", sales_data)


Derived Column Transformation:
     ID Full Name  Age    Country  Revenue Revenue Category
0  101     AARAV   27      India      500             High
1  102     SANYA   32      India      700             High
2  103     ROHIT   28        USA      200              Low
3  104     MEERA   35         UK      950             High
4  105     KABIR   30     Canada      300              Low
5  106    ISHITA   25  Australia      450             High
