Q1. List any five functions of the pandas library with execution.

In [1]:
import pandas as pd

# Creating a DataFrame
data = {"Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
        "Age": [25, 30, 35, 40, 45]}

df = pd.DataFrame(data)

# Display the first 3 rows
print(df.head(3))
print(df.info())
print(df.describe())
sorted_df = df.sort_values(by="Age", ascending=False)
print(sorted_df)
df_dropped = df.drop(columns=["Age"])
print(df_dropped)


      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    5 non-null      object
 1   Age     5 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 212.0+ bytes
None
             Age
count   5.000000
mean   35.000000
std     7.905694
min    25.000000
25%    30.000000
50%    35.000000
75%    40.000000
max    45.000000
      Name  Age
4      Eve   45
3    David   40
2  Charlie   35
1      Bob   30
0    Alice   25
      Name
0    Alice
1      Bob
2  Charlie
3    David
4      Eve


Q2. Given a Pandas DataFrame df with columns 'A', 'B', and 'C', write a Python function to re-index the DataFrame with a new index that starts from 1 and increments by 2 for each row.

In [2]:
def reindex_dataframe(df):

    new_index = range(1, 2 * len(df), 2)
    df.index = new_index
    return df

data = {"A": [10, 20, 30], "B": [40, 50, 60], "C": [70, 80, 90]}
df = pd.DataFrame(data)

df = reindex_dataframe(df)

print(df)


    A   B   C
1  10  40  70
3  20  50  80
5  30  60  90


Q3. You have a Pandas DataFrame df with a column named 'Values'. Write a Python function that iterates over the DataFrame and calculates the sum of the first three values in the 'Values' column. The function should print the sum to the console.

In [3]:
def sum_first_three(df):

    total = df["Values"].iloc[:3].sum()
    print("Sum of first three values:", total)

data = {"Values": [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

sum_first_three(df)

Sum of first three values: 60


Q4. Given a Pandas DataFrame df with a column 'Text', write a Python function to create a new column 'Word_Count' that contains the number of words in each row of the 'Text' column.

In [4]:
def add_word_count(df):

    df["Word_Count"] = df["Text"].apply(lambda x: len(str(x).split()))
    return df

data = {"Text": ["Hello world", "Pandas is great", "Count words in text"]}
df = pd.DataFrame(data)

df = add_word_count(df)

print(df)

                  Text  Word_Count
0          Hello world           2
1      Pandas is great           3
2  Count words in text           4


Q5. How are DataFrame.size() and DataFrame.shape() different?

DataFrame.size

Returns the total number of elements (cells) in the DataFrame.
Computed as rows × columns.
Output is a single integer.
DataFrame.shape

Returns the dimensions of the DataFrame in the format (rows, columns).
Output is a tuple of two values: number of rows and number of columns.

In [5]:
data = {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}
df = pd.DataFrame(data)

print("Size:", df.size)      # Total elements (3 rows × 3 columns = 9)
print("Shape:", df.shape)    # Dimensions (3 rows, 3 columns)


Size: 9
Shape: (3, 3)


Q6. Which function of pandas do we use to read an excel file?

df = pd.read_excel("file_path.xlsx", sheet_name="Sheet1")

Q7. You have a Pandas DataFrame df that contains a column named 'Email' that contains email addresses in the format 'username@domain.com'. Write a Python function that creates a new column 'Username' in df that contains only the username part of each email address.

In [8]:
def extract_username(df):

    df["Username"] = df["Email"].apply(lambda x: x.split("@")[0])
    return df

data = {"Email": ["nihar@example.com", "ranjan@test.com", "sahoo@domain.org"]}
df = pd.DataFrame(data)

df = extract_username(df)

print(df)

               Email Username
0  nihar@example.com    nihar
1    ranjan@test.com   ranjan
2   sahoo@domain.org    sahoo


Q8. You have a Pandas DataFrame df with columns 'A', 'B', and 'C'. Write a Python function that selects
all rows where the value in column 'A' is greater than 5 and the value in column 'B' is less than 10. The
function should return a new DataFrame that contains only the selected rows.
For example, if df contains the following values:
A B C
0 3 5 1
1 8 2 7
2 6 9 4
3 2 3 5
4 9 1 2
Your function should select the following rows: A B C
1 8 2 7
4 9 1 2
The function should return a new DataFrame that contains only the selected rows.

In [9]:
def filter_rows(df):
    filtered_df = df[(df["A"] > 5) & (df["B"] < 10)]
    return filtered_df

data = {"A": [3, 8, 6, 2, 9], "B": [5, 2, 9, 3, 1], "C": [1, 7, 4, 5, 2]}
df = pd.DataFrame(data)

filtered_df = filter_rows(df)

print(filtered_df)

   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


Q9. Given a Pandas DataFrame df with a column 'Values', write a Python function to calculate the mean, median, and standard deviation of the values in the 'Values' column.

In [10]:
def calculate_stats(df):
    mean_value = df["Values"].mean()  # Calculate mean
    median_value = df["Values"].median()  # Calculate median
    std_dev = df["Values"].std()  # Calculate standard deviation

    print(f"Mean: {mean_value}")
    print(f"Median: {median_value}")
    print(f"Standard Deviation: {std_dev}")

data = {"Values": [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

calculate_stats(df)

Mean: 30.0
Median: 30.0
Standard Deviation: 15.811388300841896


Q10. Given a Pandas DataFrame df with a column 'Sales' and a column 'Date', write a Python function to create a new column 'MovingAverage' that contains the moving average of the sales for the past 7 days for each row in the DataFrame. The moving average should be calculated using a window of size 7 and
should include the current day.

In [11]:
def add_moving_average(df):

    df["Date"] = pd.to_datetime(df["Date"])

    df = df.sort_values("Date")

    df["MovingAverage"] = df["Sales"].rolling(window=7, min_periods=1).mean()

    return df

# Sample DataFrame
data = {
    "Date": pd.date_range(start="2024-01-01", periods=10, freq="D"),
    "Sales": [100, 200, 150, 300, 250, 400, 500, 600, 700, 800]
}
df = pd.DataFrame(data)

df = add_moving_average(df)

print(df)

        Date  Sales  MovingAverage
0 2024-01-01    100     100.000000
1 2024-01-02    200     150.000000
2 2024-01-03    150     150.000000
3 2024-01-04    300     187.500000
4 2024-01-05    250     200.000000
5 2024-01-06    400     233.333333
6 2024-01-07    500     271.428571
7 2024-01-08    600     342.857143
8 2024-01-09    700     414.285714
9 2024-01-10    800     507.142857


Q11. You have a Pandas DataFrame df with a column 'Date'. Write a Python function that creates a new column 'Weekday' in the DataFrame. The 'Weekday' column should contain the weekday name (e.g. Monday, Tuesday) corresponding to each date in the 'Date' column. For example, if df contains the following values:
Date
0 2023-01-01
1 2023-01-02
2 2023-01-03
3 2023-01-04
4 2023-01-05
Your function should create the following DataFrame:

Date Weekday
0 2023-01-01 Sunday
1 2023-01-02 Monday
2 2023-01-03 Tuesday
3 2023-01-04 Wednesday
4 2023-01-05 Thursday
The function should return the modified DataFrame.

In [12]:
def add_weekday_column(df):

    df["Date"] = pd.to_datetime(df["Date"])

    df["Weekday"] = df["Date"].dt.day_name()

    return df

data = {"Date": ["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-05"]}
df = pd.DataFrame(data)

df = add_weekday_column(df)

print(df)

        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday


Q12. Given a Pandas DataFrame df with a column 'Date' that contains timestamps, write a Python function to select all rows where the date is between '2023-01-01' and '2023-01-31'.

In [14]:
def filter_date_range(df):

    df["Date"] = pd.to_datetime(df["Date"])

    start_date = "2023-01-01"
    end_date = "2023-01-31"

    filtered_df = df[df["Date"].between(start_date, end_date)]

    return filtered_df

data = {
    "Date": ["2022-12-31", "2023-01-01", "2023-01-15", "2023-01-31", "2023-02-01"],
    "Value": [10, 20, 30, 40, 50]
}
df = pd.DataFrame(data)

filtered_df = filter_date_range(df)

print(filtered_df)


        Date  Value
1 2023-01-01     20
2 2023-01-15     30
3 2023-01-31     40


Q13. To use the basic functions of pandas, what is the first and foremost necessary library that needs to be imported?

In [15]:
import pandas as pd