Given a CSV file, load it into a Pandas DataFrame and display the first 5 rows.


In [None]:
import pandas as pd
import numpy as np  # For handling missing values (NaN) in some examples

# 1. Given a CSV file, load it into a Pandas DataFrame and display the first 5 rows.
def load_and_display_csv(csv_file):
    """Loads a CSV file into a Pandas DataFrame and displays the first 5 rows."""
    try:
        df = pd.read_csv(csv_file)
        print("First 5 rows of the DataFrame:")
        print(df.head())
    except FileNotFoundError:
        print(f"Error: File '{csv_file}' not found.")

# Create a sample CSV file for demonstration
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Emily'],
        'Age': [25, 30, 22, 28, 34],
        'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']}
df_sample = pd.DataFrame(data)
df_sample.to_csv('sample_data.csv', index=False)  # Save DataFrame to CSV

load_and_display_csv('sample_data.csv')

Create a Pandas DataFrame from a dictionary containing student names and their scores.


In [None]:

# 2. Create a Pandas DataFrame from a dictionary containing student names and their scores.
def create_student_dataframe(student_data):
    """Creates a Pandas DataFrame from a dictionary of student data."""
    df = pd.DataFrame(student_data)
    return df

student_data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
                'Score': [90, 85, 92, 78]}
student_df = create_student_dataframe(student_data)
print("\nStudent DataFrame:")
print(student_df)

Write a program to select specific columns ('Name' and 'Age') from a DataFrame.


In [None]:

# 3. Write a program to select specific columns ('Name' and 'Age') from a DataFrame.
def select_name_age_columns(df):
    """Selects the 'Name' and 'Age' columns from a DataFrame."""
    try:
        name_age_df = df[['Name', 'Age']]
        return name_age_df
    except KeyError:
        print("Error: DataFrame does not contain 'Name' or 'Age' columns.")
        return None

name_age_df = select_name_age_columns(df_sample)
if name_age_df is not None:
    print("\nName and Age Columns:")
    print(name_age_df)


Implement a function that filters a DataFrame to show only rows where 'Age' is greater than a given value.


In [None]:


# 4. Implement a function that filters a DataFrame to show only rows where 'Age' is greater than a given value.
def filter_by_age(df, min_age):
    """Filters a DataFrame to show rows where 'Age' is greater than min_age."""
    try:
        filtered_df = df[df['Age'] > min_age]
        return filtered_df
    except KeyError:
        print("Error: DataFrame does not contain an 'Age' column.")
        return None

filtered_df = filter_by_age(df_sample, 27)
if filtered_df is not None:
    print("\nDataFrame filtered by Age > 27:")
    print(filtered_df)

Given a DataFrame with sales data, calculate the total sales for each product.


In [None]:

# 5. Given a DataFrame with sales data, calculate the total sales for each product.
def calculate_total_sales(df):
    """Calculates the total sales for each product in a DataFrame."""
    try:
        df['Total Sales'] = df['Price'] * df['Quantity']
        total_sales_by_product = df.groupby('Product')['Total Sales'].sum()
        return total_sales_by_product
    except KeyError:
        print("Error: DataFrame missing 'Product', 'Price', or 'Quantity' columns.")
        return None

# Create a sample sales DataFrame
sales_data = {'Product': ['A', 'B', 'A', 'C', 'B', 'C'],
              'Price': [10, 20, 10, 15, 20, 15],
              'Quantity': [5, 2, 10, 3, 4, 8]}
sales_df = pd.DataFrame(sales_data)

total_sales = calculate_total_sales(sales_df)
if total_sales is not None:
    print("\nTotal Sales by Product:")
    print(total_sales)


Create a new column 'Total' (Price * Quantity) in a DataFrame.


In [None]:



# 6. Create a new column 'Total' (Price * Quantity) in a DataFrame.
def create_total_column(df):
    """Creates a new column 'Total' (Price * Quantity) in a DataFrame."""
    try:
        df['Total'] = df['Price'] * df['Quantity']
        return df
    except KeyError:
        print("Error: DataFrame missing 'Price' or 'Quantity' columns.")
        return None

sales_df_with_total = create_total_column(sales_df.copy()) # Create a copy to avoid modifying original
if sales_df_with_total is not None:
    print("\nDataFrame with Total column:")
    print(sales_df_with_total)

Write a program to sort a DataFrame by a specific column (e.g., 'Salary').


In [None]:



# 7. Write a program to sort a DataFrame by a specific column (e.g., 'Salary').
def sort_dataframe(df, column_name, ascending=True):
    """Sorts a DataFrame by a specific column."""
    try:
        sorted_df = df.sort_values(by=column_name, ascending=ascending)
        return sorted_df
    except KeyError:
        print(f"Error: DataFrame does not contain a '{column_name}' column.")
        return None

# Create a sample employee DataFrame
employee_data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
                 'Age': [25, 30, 22, 28],
                 'Salary': [50000, 60000, 45000, 55000]}
employee_df = pd.DataFrame(employee_data)

sorted_employee_df = sort_dataframe(employee_df.copy(), 'Salary')
if sorted_employee_df is not None:
    print("\nDataFrame sorted by Salary:")
    print(sorted_employee_df)


Implement a function that groups a DataFrame by a categorical column and calculates the average value of a numerical column.


In [None]:

# 8. Implement a function that groups a DataFrame by a categorical column and calculates the average value of a numerical column.
def group_and_calculate_average(df, group_column, value_column):
    """Groups a DataFrame by a categorical column and calculates the average of a numerical column."""
    try:
        average_values = df.groupby(group_column)[value_column].mean()
        return average_values
    except KeyError:
        print(f"Error: DataFrame does not contain '{group_column}' or '{value_column}' columns.")
        return None

average_salary_by_age = group_and_calculate_average(employee_df, 'Age', 'Salary')
if average_salary_by_age is not None:
    print("\nAverage Salary by Age:")
    print(average_salary_by_age)

Given two DataFrames, merge them based on a common column ('ID').


In [None]:


# 9. Given two DataFrames, merge them based on a common column ('ID').
def merge_dataframes(df1, df2, common_column):
    """Merges two DataFrames based on a common column."""
    try:
        merged_df = pd.merge(df1, df2, on=common_column)
        return merged_df
    except KeyError:
        print(f"Error: DataFrames do not contain a '{common_column}' column.")
        return None

# Create sample DataFrames for merging
df1 = pd.DataFrame({'ID': [1, 2, 3, 4],
                    'Product': ['A', 'B', 'C', 'D']})
df2 = pd.DataFrame({'ID': [2, 4, 1, 3],  # Note: Different order
                    'Price': [10, 20, 15, 25]})

merged_df = merge_dataframes(df1, df2, 'ID')
if merged_df is not None:
    print("\nMerged DataFrame:")
    print(merged_df)

Write a program to handle missing values (fill with 0 or drop rows) in a DataFrame.


In [None]:


# 10. Write a program to handle missing values (fill with 0 or drop rows) in a DataFrame.
def handle_missing_values(df, fill_value=0, drop_na=False):
    """Handles missing values in a DataFrame."""
    if drop_na:
        cleaned_df = df.dropna()
        print("\nDataFrame after dropping rows with missing values:")
        return cleaned_df
    else:
        filled_df = df.fillna(fill_value)
        print(f"\nDataFrame after filling missing values with {fill_value}:")
        return filled_df

# Create a sample DataFrame with missing values
missing_data = {'A': [1, 2, np.nan, 4],
                'B': [5, np.nan, 7, 8],
                'C': [9, 10, 11, 12]}
missing_df = pd.DataFrame(missing_data)
print("\nOriginal DataFrame with missing values:")
print(missing_df)

filled_df = handle_missing_values(missing_df.copy(), fill_value=0)
print(filled_df)

dropped_df = handle_missing_values(missing_df.copy(), drop_na=True)
print(dropped_df)

Create a function to read data from a CSV file, clean it (handle missing values), and return the cleaned DataFrame.


In [None]:


# 11. Create a function to read data from a CSV file, clean it (handle missing values), and return the cleaned DataFrame.
def read_clean_csv(csv_file, fill_value=0):
    """Reads data from a CSV, cleans it by filling missing values, and returns the cleaned DataFrame."""
    try:
        df = pd.read_csv(csv_file)
        cleaned_df = df.fillna(fill_value)
        return cleaned_df
    except FileNotFoundError:
        print(f"Error: File '{csv_file}' not found.")
        return None

# Assuming 'sample_data.csv' exists (created earlier)
cleaned_data_df = read_clean_csv('sample_data.csv')
if cleaned_data_df is not None:
    print("\nCleaned DataFrame from CSV:")
    print(cleaned_data_df)

Write a program to calculate descriptive statistics (mean, median, std) for numerical columns in a DataFrame.


In [None]:


# 12. Write a program to calculate descriptive statistics (mean, median, std) for numerical columns in a DataFrame.
def get_descriptive_statistics(df):
    """Calculates descriptive statistics for numerical columns in a DataFrame."""
    numerical_df = df.select_dtypes(include=np.number)  # Select only numeric columns
    if not numerical_df.empty:
        statistics = numerical_df.describe()
        return statistics
    else:
        print("DataFrame has no numerical columns.")
        return None

descriptive_stats = get_descriptive_statistics(employee_df)
if descriptive_stats is not None:
    print("\nDescriptive Statistics for Employee DataFrame:")
    print(descriptive_stats)

Implement a function to add a new row to a DataFrame.


In [None]:


# 13. Implement a function to add a new row to a DataFrame.
def add_row_to_dataframe(df, new_row_data):
    """Adds a new row to a DataFrame."""
    try:
        new_row = pd.Series(new_row_data, index=df.columns)
        updated_df = pd.concat([df, new_row.to_frame().T], ignore_index=True)
        return updated_df
    except ValueError:
        print("Error: New row data does not match the DataFrame's columns.")
        return None

new_employee = {'Name': 'Eve', 'Age': 29, 'Salary': 52000}
updated_employee_df = add_row_to_dataframe(employee_df.copy(), new_employee)
if updated_employee_df is not None:
    print("\nDataFrame with new row:")
    print(updated_employee_df)