In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [7]:
# Task 1: Create a test script to verify your setup
def verify_environment():
    """Verify pandas installation and basic functionality"""
    tests = []
    
    # Test 1: Import works
    try:
        import pandas as pd
        tests.append(("Import pandas", "PASSED", pd.__version__))
    except ImportError as e:
        tests.append(("Import pandas", "FAILED", str(e)))
    
    # Test 2: Create simple DataFrame
    try:
        df = pd.DataFrame({'test': [1, 2, 3]})
        tests.append(("Create DataFrame", "PASSED", f"Shape: {df.shape}"))
    except Exception as e:
        tests.append(("Create DataFrame", "FAILED", str(e)))
    
    # Test 3: Check optional dependencies
    try:
        import sqlalchemy
        tests.append(("SQL support", "AVAILABLE", "sqlalchemy found"))
    except ImportError:
        tests.append(("SQL support", "OPTIONAL", "install sqlalchemy"))
    
    # Print results
    print("\n" + "="*60)
    print("ENVIRONMENT VERIFICATION REPORT")
    print("="*60)
    for test, status, detail in tests:
        print(f"{test:20} [{status:10}] {detail}")
    
    return all(t[1] in ['PASSED', 'AVAILABLE'] for t in tests)

# Run verification
if verify_environment():
    print("\n✅ Environment is ready for pandas development!")
else:
    print("\n⚠️  Some issues detected. Please resolve before proceeding.")


ENVIRONMENT VERIFICATION REPORT
Import pandas        [PASSED    ] 2.3.2
Create DataFrame     [PASSED    ] Shape: (3, 1)
SQL support          [AVAILABLE ] sqlalchemy found

✅ Environment is ready for pandas development!


In [3]:
class EmployeeDatabase:
    def __init__(self):
        self.employee = pd.DataFrame({
            'employee_id' : pd.Series([], dtype='int64'),
            'employee_name' : pd.Series([], dtype='object'),
            'department' : pd.Series([], dtype="object"),
            'join_date' : pd.Series([], dtype='datetime64[ns]'),
            'salary' : pd.Series([], dtype="float64"),
            'performance_score' : pd.Series([], dtype='float64')
        })
        print("Employee Database intialized Successfully!")
        print(f"Database Schema : {self.employee.shape[0]} rows, {self.employee.shape[1]} columns")

    def show_info(self):
        print("Employee Database Status")
        print(f"Total Employees : {len(self.employee)}")
        print(f"Total Memory Usage : {self.employee.memory_usage(deep=True).sum() / 1024:.2f} KB")
        print("\nColumn Data Types")
        print(self.employee.dtypes)
        print("\nFirst Look at the Data:")
        print(self.employee.head())

db = EmployeeDatabase()
db.show_info()

Employee Database intialized Successfully!
Database Schema : 0 rows, 6 columns
Employee Database Status
Total Employees : 0
Total Memory Usage : 0.13 KB

Column Data Types
employee_id                   int64
employee_name                object
department                   object
join_date            datetime64[ns]
salary                      float64
performance_score           float64
dtype: object

First Look at the Data:
Empty DataFrame
Columns: [employee_id, employee_name, department, join_date, salary, performance_score]
Index: []


In [4]:
# Understanding internal representation
sample_data = {'Name': ['Alice', 'Bob', 'Charlie'],
               'Age': [25, 30, 35],
               'Department': ['HR', 'Engineering', 'Sales']}

df = pd.DataFrame(sample_data)
print("\nDataFrame Structure Analysis:")
print(f"1. Columns (dict keys): {list(df.columns)}")
print(f"2. Each column is a Series:")
for col in df.columns:
    print(f"   - '{col}': {type(df[col])}, dtype: {df[col].dtype}")
print(f"3. Index: {df.index}")
print(f"4. Underlying NumPy arrays:")
print(f"   - Names array: {df['Name'].values}")
print(f"   - Ages array: {df['Age'].values}")


DataFrame Structure Analysis:
1. Columns (dict keys): ['Name', 'Age', 'Department']
2. Each column is a Series:
   - 'Name': <class 'pandas.core.series.Series'>, dtype: object
   - 'Age': <class 'pandas.core.series.Series'>, dtype: int64
   - 'Department': <class 'pandas.core.series.Series'>, dtype: object
3. Index: RangeIndex(start=0, stop=3, step=1)
4. Underlying NumPy arrays:
   - Names array: ['Alice' 'Bob' 'Charlie']
   - Ages array: [25 30 35]
