In [2]:
import pandas as pd

In [4]:
# Sample dataset
data = {
    'ID': [1, 2, 3, 4, 5, 6],
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank'],
    'Age': [25, 30, 35, 40, 22, 29],
    'Country': ['USA', 'UK', 'USA', 'India', 'Canada', 'UK'],
    'Sales': [200, 450, 300, 800, 150, 400]
}

In [6]:
df = pd.DataFrame(data)

In [10]:
# 1. Transform text data - Convert Name column to uppercase
df['Name_Upper'] = df['Name'].str.upper()
print("Character Map (Uppercase Names):")
print(df[['ID', 'Name', 'Name_Upper']])

Character Map (Uppercase Names):
   ID     Name Name_Upper
0   1    Alice      ALICE
1   2      Bob        BOB
2   3  Charlie    CHARLIE
3   4    David      DAVID
4   5      Eve        EVE
5   6    Frank      FRANK


In [12]:
# 2. Create multiple copies of dataset
copy1 = df.copy()
copy2 = df.copy()

In [14]:
# Modify copies for parallel transformations
copy1['Sales'] = copy1['Sales'] * 1.1  # Increase sales by 10%
copy2['Age'] = copy2['Age'] + 5  # Increase age by 5 years

print("\nCopy 1 (Sales Increased):")
print(copy1)
print("\nCopy 2 (Age Increased):")
print(copy2)


Copy 1 (Sales Increased):
   ID     Name  Age Country  Sales Name_Upper
0   1    Alice   25     USA  220.0      ALICE
1   2      Bob   30      UK  495.0        BOB
2   3  Charlie   35     USA  330.0    CHARLIE
3   4    David   40   India  880.0      DAVID
4   5      Eve   22  Canada  165.0        EVE
5   6    Frank   29      UK  440.0      FRANK

Copy 2 (Age Increased):
   ID     Name  Age Country  Sales Name_Upper
0   1    Alice   30     USA    200      ALICE
1   2      Bob   35      UK    450        BOB
2   3  Charlie   40     USA    300    CHARLIE
3   4    David   45   India    800      DAVID
4   5      Eve   27  Canada    150        EVE
5   6    Frank   34      UK    400      FRANK


In [16]:
# 3. Conditional Split: Filter data based on Sales > 300
high_sales = df[df['Sales'] > 300]
low_sales = df[df['Sales'] <= 300]

print("\nHigh Sales:")
print(high_sales)
print("\nLow Sales:")
print(low_sales)


High Sales:
   ID   Name  Age Country  Sales Name_Upper
1   2    Bob   30      UK    450        BOB
3   4  David   40   India    800      DAVID
5   6  Frank   29      UK    400      FRANK

Low Sales:
   ID     Name  Age Country  Sales Name_Upper
0   1    Alice   25     USA    200      ALICE
2   3  Charlie   35     USA    300    CHARLIE
4   5      Eve   22  Canada    150        EVE


In [18]:
# 4. Aggregation: Total sales by country
total_sales_by_country = df.groupby('Country', as_index=False)['Sales'].sum()
print("\nAggregation (Total Sales by Country):")
print(total_sales_by_country)


Aggregation (Total Sales by Country):
  Country  Sales
0  Canada    150
1   India    800
2      UK    850
3     USA    500


In [20]:
# 5. Sort dataset by Sales in descending order
sorted_df = df.sort_values(by='Sales', ascending=False)
print("\nSort (Descending Sales):")
print(sorted_df)


Sort (Descending Sales):
   ID     Name  Age Country  Sales Name_Upper
3   4    David   40   India    800      DAVID
1   2      Bob   30      UK    450        BOB
5   6    Frank   29      UK    400      FRANK
2   3  Charlie   35     USA    300    CHARLIE
0   1    Alice   25     USA    200      ALICE
4   5      Eve   22  Canada    150        EVE


In [22]:
# 6. Derived Column: Categorize sales as High or Low
df['Sales_Category'] = df['Sales'].apply(lambda x: 'High' if x > 300 else 'Low')
print("\nDerived Column (Sales Category):")
print(df[['ID', 'Name', 'Sales', 'Sales_Category']])


Derived Column (Sales Category):
   ID     Name  Sales Sales_Category
0   1    Alice    200            Low
1   2      Bob    450           High
2   3  Charlie    300            Low
3   4    David    800           High
4   5      Eve    150            Low
5   6    Frank    400           High
