In [4]:
import pandas as pd

def load_dataset(path_or_url):
    """Loads dataset and handles loading errors."""
    try:
        df = pd.read_csv(path_or_url)
        print("✅ Dataset loaded successfully.")
        return df
    except FileNotFoundError:
        print("❌ File not found. Please check the path or URL.")
    except pd.errors.ParserError:
        print("❌ Error parsing the CSV file.")
    except Exception as e:
        print(f"❌ An unexpected error occurred: {e}")
    return None

def check_missing_values(df):
    """Returns missing value counts per column."""
    return df.isnull().sum()

def check_duplicates(df):
    """Returns count and sample of duplicate rows."""
    duplicate_count = df.duplicated().sum()
    duplicates = df[df.duplicated()]
    return duplicate_count, duplicates

def generate_data_quality_report(df):
    """Generates and prints a full data quality report."""
    print("\n📊 --- Data Quality Report ---")
    
    missing = check_missing_values(df)
    print("\n🔍 Missing Values:\n", missing)

    dup_count, _ = check_duplicates(df)
    print("\n🔁 Duplicate Rows Count:", dup_count)

    stats = df.describe(include='all')
    print("\n📈 Basic Statistics:\n", stats)

# ---- Main Execution ----
if __name__ == "__main__":
    dataset_url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
    df = load_dataset(dataset_url)

    if df is not None:
        # Specific check for 'Age' column
        if 'Age' in df.columns:
            missing_age = df['Age'].isnull().sum()
            print(f"\n🔎 Missing values in 'Age' column: {missing_age}")
        else:
            print("\n⚠️ 'Age' column not found in dataset.")

        # Generate full report
        generate_data_quality_report(df)

✅ Dataset loaded successfully.

🔎 Missing values in 'Age' column: 177

📊 --- Data Quality Report ---

🔍 Missing Values:
 PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

🔁 Duplicate Rows Count: 0

📈 Basic Statistics:
         PassengerId    Survived      Pclass                     Name   Sex  \
count    891.000000  891.000000  891.000000                      891   891   
unique          NaN         NaN         NaN                      891     2   
top             NaN         NaN         NaN  Braund, Mr. Owen Harris  male   
freq            NaN         NaN         NaN                        1   577   
mean     446.000000    0.383838    2.308642                      NaN   NaN   
std      257.353842    0.486592    0.836071                      NaN   NaN   
min        1.000000    0.000000    1.000000