In [1]:
import pandas as pd
import os

# Full path to CSV file inside /content/sample_data
file_path = '/content/sample_data/california_housing_train.csv'

# Check if file exists
if os.path.exists(file_path):
    df = pd.read_csv(file_path)
    print("✅ File loaded successfully!\n")

    # Display full DataFrame (optional - not for large datasets)
    print(df)

    # Show first 5 and last 5 rows
    print("\n🔹 First 5 rows:")
    print(df.head())

    print("\n🔹 Last 5 rows:")
    print(df.tail())

    # Inspect DataFrame
    print("\n🔹 Shape of DataFrame (rows, columns):")
    print(df.shape)

    print("\n🔹 Info:")
    print(df.info())

    print("\n🔹 Summary statistics:")
    print(df.describe())

    # Selecting columns
    print("\n🔹 Single column: 'median_income'")
    print(df['median_income'])

    print("\n🔹 Multiple columns: ['housing_median_age', 'median_house_value']")
    print(df[['housing_median_age', 'median_house_value']])

    # Filtering rows
    print("\n🔹 Filter where median_income > 10:")
    print(df[df['median_income'] > 10])

    # Adding a new column
    df['income_plus_age'] = df['median_income'] + df['housing_median_age']

    # Dropping columns
    df = df.drop('total_rooms', axis=1)  # drop a single column
    df = df.drop(['population', 'households'], axis=1)  # drop multiple columns

    # Handling missing values
    print("\n🔹 Missing value count per column:")
    print(df.isnull().sum())

    df = df.dropna()  # Drop rows with missing values
    df = df.fillna(0)  # Fill any remaining with 0

    # Save to Excel
    df.to_excel('output.xlsx', index=False)
    print("\n✅ Data saved to 'output.xlsx'")

else:
    print(f"❌ File not found at path: {file_path}")







✅ File loaded successfully!

       longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0        -114.31     34.19                15.0       5612.0          1283.0   
1        -114.47     34.40                19.0       7650.0          1901.0   
2        -114.56     33.69                17.0        720.0           174.0   
3        -114.57     33.64                14.0       1501.0           337.0   
4        -114.57     33.57                20.0       1454.0           326.0   
...          ...       ...                 ...          ...             ...   
16995    -124.26     40.58                52.0       2217.0           394.0   
16996    -124.27     40.69                36.0       2349.0           528.0   
16997    -124.30     41.84                17.0       2677.0           531.0   
16998    -124.30     41.80                19.0       2672.0           552.0   
16999    -124.35     40.54                52.0       1820.0           300.0   

       population  hou