# Question-1

In [None]:
import pandas as pd

# Read a CSV file into a DataFrame
df = pd.read_csv('data.csv')

In [None]:
# Display the first 5 rows of the DataFrame
df.head()

In [None]:
# Get information about the DataFrame
df.info()

In [None]:
# Group data by a 'category' column and calculate the mean of 'value' for each group
grouped = df.groupby('category')['value'].mean()

In [None]:
import matplotlib.pyplot as plt

# Create a line plot of a column
df['column_name'].plot(kind='line')
plt.xlabel('X-Axis Label')
plt.ylabel('Y-Axis Label')
plt.title('Title of the Plot')
plt.show()

# Question-2

In [1]:
import pandas as pd

def reindex_with_custom_index(df):
    # Create a new custom index array starting from 1 and incrementing by 2
    new_index = range(1, len(df) * 2, 2)
    
    # Set the custom index on the DataFrame
    df = df.set_index(pd.Index(new_index))
    
    return df

# Example usage:
data = {'A': [10, 20, 30], 'B': [40, 50, 60], 'C': [70, 80, 90]}
df = pd.DataFrame(data)

# Re-index the DataFrame
df = reindex_with_custom_index(df)

# Display the updated DataFrame
print(df)


    A   B   C
1  10  40  70
3  20  50  80
5  30  60  90


# Question-3

In [2]:
import pandas as pd

def calculate_sum_of_first_three_values(df):
    # Check if the 'Values' column exists in the DataFrame
    if 'Values' in df.columns:
        # Select the first three values from the 'Values' column and calculate their sum
        first_three_values = df['Values'][:3]
        total_sum = first_three_values.sum()
        
        # Print the sum to the console
        print("Sum of the first three values:", total_sum)
    else:
        print("The 'Values' column does not exist in the DataFrame.")

# Example usage:
data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

# Calculate and print the sum of the first three values
calculate_sum_of_first_three_values(df)


Sum of the first three values: 60


# Question-4

In [3]:
import pandas as pd

def add_word_count_column(df):
    # Use the str.split() method to split the 'Text' column by whitespace and count the words
    df['Word_Count'] = df['Text'].str.split().apply(len)

# Example usage:
data = {'Text': ['This is a sample sentence.',
                 'Another example of a sentence.',
                 'Just a short text.']}
df = pd.DataFrame(data)

# Add the 'Word_Count' column to the DataFrame
add_word_count_column(df)

# Display the updated DataFrame
print(df)


                             Text  Word_Count
0      This is a sample sentence.           5
1  Another example of a sentence.           5
2              Just a short text.           4


# Question-5

## DataFrame.size:

### DataFrame.size returns the total number of elements in the DataFrame. It calculates the size by multiplying the number of rows by the number of columns.
### It provides the total count of individual elements in the DataFrame, including all cells, regardless of their values or data types.

In [5]:
import pandas as pd

data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)

size = df.size  # This will return 6 (3 rows * 2 columns)

## DataFrame.shape:

### DataFrame.shape returns a tuple representing the dimensions of the DataFrame, where the first element is the number of rows, and the second element is the number of columns.
### It provides structural information about the DataFrame, including the number of rows and columns.
### It returns a tuple (number_of_rows, number_of_columns).

In [6]:
import pandas as pd

data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)

shape = df.shape  # This will return (3, 2) since there are 3 rows and 2 columns.

# Question-6

In [None]:
import pandas as pd

# Read an Excel file into a DataFrame
df = pd.read_excel('your_file.xlsx')

# Question-7

In [8]:
import pandas as pd

def extract_username(df):
    # Use str.split() to split the 'Email' column at the '@' symbol and get the first part (the username)
    df['Username'] = df['Email'].str.split('@').str.get(0)

# Example usage:
data = {'Email': ['john.doe@example.com', 'jane.smith@example.com', 'bob@example.com']}
df = pd.DataFrame(data)

# Extract and store the usernames in the 'Username' column
extract_username(df)

# Display the updated DataFrame
print(df)


                    Email    Username
0    john.doe@example.com    john.doe
1  jane.smith@example.com  jane.smith
2         bob@example.com         bob


# Question-8

In [9]:
import pandas as pd

def select_rows(df):
    # Use boolean indexing to select rows where 'A' > 5 and 'B' < 10
    selected_rows = df[(df['A'] > 5) & (df['B'] < 10)]
    
    return selected_rows

# Example usage:
data = {'A': [3, 8, 6, 2, 9],
        'B': [5, 2, 9, 3, 1],
        'C': [1, 7, 4, 5, 2]}

df = pd.DataFrame(data)

# Select rows where 'A' > 5 and 'B' < 10
selected_df = select_rows(df)

# Display the selected DataFrame
print(selected_df)


   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


# Question-9

In [10]:
import pandas as pd

def calculate_stats(df):
    # Calculate the mean, median, and standard deviation of the 'Values' column
    mean_value = df['Values'].mean()
    median_value = df['Values'].median()
    std_deviation = df['Values'].std()
    
    return mean_value, median_value, std_deviation

# Example usage:
data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

# Calculate mean, median, and standard deviation
mean, median, std = calculate_stats(df)

# Print the results
print("Mean:", mean)
print("Median:", median)
print("Standard Deviation:", std)


Mean: 30.0
Median: 30.0
Standard Deviation: 15.811388300841896


# Question-10

In [11]:
import pandas as pd

def calculate_moving_average(df):
    # Sort the DataFrame by the 'Date' column (if not already sorted)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values(by='Date')

    # Calculate the moving average over a rolling window of the past 7 days
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()

    return df

# Example usage:
data = {'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'],
        'Sales': [10, 15, 20, 25, 30]}

df = pd.DataFrame(data)

# Calculate the moving average and add it to the DataFrame
df = calculate_moving_average(df)

# Display the updated DataFrame
print(df)


        Date  Sales  MovingAverage
0 2023-01-01     10           10.0
1 2023-01-02     15           12.5
2 2023-01-03     20           15.0
3 2023-01-04     25           17.5
4 2023-01-05     30           20.0


# Question-11

In [12]:
import pandas as pd

def add_weekday_column(df):
    # Convert the 'Date' column to a datetime type
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Create a new 'Weekday' column with weekday names
    df['Weekday'] = df['Date'].dt.strftime('%A')

    return df

# Example usage:
data = {'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']}
df = pd.DataFrame(data)

# Add the 'Weekday' column to the DataFrame
df = add_weekday_column(df)

# Display the updated DataFrame
print(df)


        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday


# Question-12

In [14]:
import pandas as pd

def select_rows_between_dates(df):
    # Convert the 'Date' column to a datetime type (if not already)
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Define the start and end date range
    start_date = pd.to_datetime('2023-01-01')
    end_date = pd.to_datetime('2023-01-31')
    
    # Use boolean indexing to select rows within the date range
    selected_rows = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
    
    return selected_rows

# Example usage:
data = {'Date': ['2023-01-05', '2023-01-15', '2023-02-10', '2023-01-25']}
df = pd.DataFrame(data)

# Select rows between '2023-01-01' and '2023-01-31'
selected_df = select_rows_between_dates(df)

# Display the selected DataFrame
print(selected_df)


        Date
0 2023-01-05
1 2023-01-15
3 2023-01-25


# Question-13

In [16]:
import pandas as pd