In [2]:
import pandas as pd
import numpy as np

In [3]:
# Data provided
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Hannah'],
    'Roll No': [1, 2, 3, 4, 5, 6, 7, 8],
    'Address': ['123 Maple St', '456 Oak St', '789 Pine St', np.nan, '202 Cedar St', '303 Elm St', np.nan, '505 Walnut St'],
    'Age': [20, 21, np.nan, 22, 20, 21, 19, 22],
    'Percentage': [85, np.nan, 78, 88, 95, 70, 82, np.nan],
    'Gender': ['F', 'M', 'M', 'M', 'F', 'M', 'F', 'F']
}

In [4]:
# Creating DataFrame
df = pd.DataFrame(data)

In [5]:
# 1. Display the first five rows of the DataFrame
first_five_rows = df.head()
print(first_five_rows)

      Name  Roll No       Address   Age  Percentage Gender
0    Alice        1  123 Maple St  20.0        85.0      F
1      Bob        2    456 Oak St  21.0         NaN      M
2  Charlie        3   789 Pine St   NaN        78.0      M
3    David        4           NaN  22.0        88.0      M
4      Eve        5  202 Cedar St  20.0        95.0      F


In [6]:
# 2. Display the data types of each column
data_types = df.dtypes
print(data_types)

Name           object
Roll No         int64
Address        object
Age           float64
Percentage    float64
Gender         object
dtype: object


In [7]:
# 3. Handle missing values
# Filling numerical values with the mean
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Percentage'].fillna(df['Percentage'].mean(), inplace=True)
# Filling missing address values with “Unknown”
df['Address'].fillna('Unknown', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Percentage'].fillna(df['Percentage'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we 

In [8]:
# 4. Calculate the mean age and mean percentage of the students
mean_age = df['Age'].mean()
mean_percentage = df['Percentage'].mean()
print(mean_age)
print(mean_percentage)

20.714285714285715
83.0


In [9]:
# 5. Add a column 'Pass' which is True if the student’s percentage is 75 or above, otherwise False
df['Pass'] = df['Percentage'] >= 75

In [10]:
# 6. Filter the DataFrame to show only the students who have passed
passed_students = df[df['Pass']]
print(passed_students)

      Name  Roll No        Address        Age  Percentage Gender  Pass
0    Alice        1   123 Maple St  20.000000        85.0      F  True
1      Bob        2     456 Oak St  21.000000        83.0      M  True
2  Charlie        3    789 Pine St  20.714286        78.0      M  True
3    David        4        Unknown  22.000000        88.0      M  True
4      Eve        5   202 Cedar St  20.000000        95.0      F  True
6    Grace        7        Unknown  19.000000        82.0      F  True
7   Hannah        8  505 Walnut St  22.000000        83.0      F  True


In [11]:
# 7. Group the DataFrame by Gender and calculate the average percentage for each group
average_percentage_by_gender = df.groupby('Gender')['Percentage'].mean()


In [12]:
# 8. Sort the DataFrame by Percentage in descending order
sorted_df = df.sort_values(by='Percentage', ascending=False)

In [13]:
# 9. Create a new column ‘Age Group’ with values ‘Under 21’ and ‘21 and above’
df['Age Group'] = df['Age'].apply(lambda x: 'Under 21' if x < 21 else '21 and above')

In [14]:
# 10. Drop the column 'Pass'
df_without_pass = df.drop(columns=['Pass'])