In [7]:
import pandas as pd


def load_data(file_path):
    """
    Loads data from an Excel file with enhanced error handling.

    This function attempts to read an Excel file from the specified file path using a context manager to ensure proper
    resource management. It handles common errors, including missing files and unexpected exceptions.

    Parameters:
    file_path (str): The path to the Excel file to be loaded.

    Returns:
    pandas.DataFrame or None: A DataFrame if the Excel file is successfully loaded. Returns None if an error occurs,
    along with an appropriate error message.

    Raises:
    None: All exceptions are handled internally.
    """
    try:
        with open(file_path, 'rb') as file:
            data = pd.read_excel(file)
    except FileNotFoundError:
        print(f"Error: The file at {file_path} could not be found.")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None
    return data

file_path = "E:\\train4.xlsx"

df = load_data(file_path)

if df is not None:
    print("Data loaded successfully!")
else:
    print("Data could not be loaded.")


Data loaded successfully!


In [9]:
def perform_data_quality_checks(df): # Data quality checks
    """
    Performs data quality checks on the DataFrame.

    Parameters:
    df (pandas.DataFrame): The DataFrame to be checked.

    Returns:
    None: Prints the results of the data quality checks.
    """
    num_rows, num_cols = df.shape
    print(f"Number of rows: {num_rows}")
    print(f"Number of columns: {num_cols}")
    
    # Checking for missing values
    if df.isnull().sum().sum() == 0:
        print("No missing values found.")
    else:
        print("Missing values found:")
        print(df.isnull().sum())
    
    # Checking for duplicates
    if df.duplicated().sum() == 0:
        print("No duplicate rows found.")
    else:
        print("Duplicate rows found.")
    
    # Checking for data types
    print("Data types:")
    print(df.dtypes)

perform_data_quality_checks(df)


Number of rows: 45211
Number of columns: 18
No missing values found.
No duplicate rows found.
Data types:
Unnamed: 0         int64
age                int64
job               object
marital           object
education         object
default_status    object
balance            int64
housing           object
loan              object
contact           object
day                int64
month             object
duration           int64
campaign           int64
pdays              int64
previous           int64
poutcome          object
y                 object
dtype: object
