# melt

In [3]:
import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Math': [85, 90, 78],
    'Science': [88, 92, 80],
    'English': [82, 85, 79]
})

# Using melt: 'Name' is fixed, the rest of the columns are melted.
melted_df = df.melt(id_vars=['Name'], var_name='Subject', value_name='Marks')

print(melted_df)


      Name  Subject  Marks
0    Alice     Math     85
1      Bob     Math     90
2  Charlie     Math     78
3    Alice  Science     88
4      Bob  Science     92
5  Charlie  Science     80
6    Alice  English     82
7      Bob  English     85
8  Charlie  English     79


In [12]:
df = pd.DataFrame({
    'Student_ID': [101, 102, 103],
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Math': [85, 90, 78],
    'Science': [88, 92, 80],
    'English': [82, 85, 79]
})

# Keeping both 'Student_ID' and 'Name' fixed.
melted_df = df.melt(id_vars=['Student_ID', 'Name'], var_name='Subject', value_name='Marks')

print(melted_df)


   Student_ID     Name  Subject  Marks
0         101    Alice     Math     85
1         102      Bob     Math     90
2         103  Charlie     Math     78
3         101    Alice  Science     88
4         102      Bob  Science     92
5         103  Charlie  Science     80
6         101    Alice  English     82
7         102      Bob  English     85
8         103  Charlie  English     79


In [17]:
# Melting only 'Math' and 'Science', leaving 'English' unchanged.
melted_df = df.melt(id_vars=['Name'], value_vars=['Math', 'Science'], var_name='Subject', value_name='Marks')

print(melted_df)


      Name  Subject  Marks
0    Alice     Math     85
1      Bob     Math     90
2  Charlie     Math     78
3    Alice  Science     88
4      Bob  Science     92
5  Charlie  Science     80


In [14]:
df

Unnamed: 0,Student_ID,Name,Math,Science,English
0,101,Alice,85,88,82
1,102,Bob,90,92,85
2,103,Charlie,78,80,79


In [18]:
import pandas as pd

# Sample data
data = {
    'Region': ['North', 'North', 'South', 'South', 'East', 'East'],
    'Product': ['A', 'B', 'A', 'B', 'A', 'B'],
    'Sales': [250, 150, 200, 300, 100, 400]
}

df = pd.DataFrame(data)
print(df)


  Region Product  Sales
0  North       A    250
1  North       B    150
2  South       A    200
3  South       B    300
4   East       A    100
5   East       B    400


In [19]:
df

Unnamed: 0,Region,Product,Sales
0,North,A,250
1,North,B,150
2,South,A,200
3,South,B,300
4,East,A,100
5,East,B,400


In [20]:
# Creating the pivot table
pivot = pd.pivot_table(
    df,
    values='Sales',       # Data to aggregate
    index='Region',       # Group by Region (rows)
    columns='Product',    # Group by Product (columns)
    aggfunc='mean'        # Aggregation function
)

print(pivot)


Product      A      B
Region               
East     100.0  400.0
North    250.0  150.0
South    200.0  300.0


In [23]:
pivot = pd.pivot_table(
    df,
    values='Sales',
    index='Region',
    columns='Product',
    aggfunc=['min', 'max', 'sum','mean','median']
)

print(pivot)


         min       max       sum        mean        median       
Product    A    B    A    B    A    B      A      B      A      B
Region                                                           
East     100  400  100  400  100  400  100.0  400.0  100.0  400.0
North    250  150  250  150  250  150  250.0  150.0  250.0  150.0
South    200  300  200  300  200  300  200.0  300.0  200.0  300.0


In [24]:
pivot = pd.pivot_table(
    df,
    values='Sales',
    index='Region',
    columns='Product',
    aggfunc='sum',
    fill_value=0  # Fill missing values with 0
)

print(pivot)


Product    A    B
Region           
East     100  400
North    250  150
South    200  300


In [25]:
data = {
    'Region': ['North', 'North', 'South', 'South', 'East', 'East'],
    'Month': ['Jan', 'Feb', 'Jan', 'Feb', 'Jan', 'Feb'],
    'Product': ['A', 'B', 'A', 'B', 'A', 'B'],
    'Sales': [250, 150, 200, 300, 100, 400]
}
df = pd.DataFrame(data)

pivot = pd.pivot_table(
    df,
    values='Sales',
    index=['Region', 'Month'],
    columns='Product',
    aggfunc='mean'
)

print(pivot)


Product           A      B
Region Month              
East   Feb      NaN  400.0
       Jan    100.0    NaN
North  Feb      NaN  150.0
       Jan    250.0    NaN
South  Feb      NaN  300.0
       Jan    200.0    NaN


In [1]:
import pandas as pd
import numpy as np

# Create a sample dataset
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 35, 40, 28],
    'Department': ['HR', 'Finance', 'IT', 'Marketing', 'Finance'],
    'Salary': [50000, 60000, 70000, 80000, 62000],
    'Joining_Date': pd.to_datetime(['2020-01-15', '2019-06-30', '2018-09-23', '2021-02-10', '2020-07-19'])
}

# Create the DataFrame
df = pd.DataFrame(data)

# Display the first few rows
print("Sample DataFrame:")
print(df.head())


Sample DataFrame:
      Name  Age Department  Salary Joining_Date
0    Alice   25         HR   50000   2020-01-15
1      Bob   30    Finance   60000   2019-06-30
2  Charlie   35         IT   70000   2018-09-23
3    David   40  Marketing   80000   2021-02-10
4      Eve   28    Finance   62000   2020-07-19


In [8]:
q1=df['Age'].quantile(0.25)
q3=df['Age'].quantile(0.75)
iqr = q3-q1
lower_bound = q1-1.5*iqr 
upper_bound=q3+1.5*iqr 
# df['Salary'].plot(kind='box')
median_values = df['Age'].median()
df.loc[(df['Age']>upper_bound)|(df['Age']<lower_bound)] = median_values 