In [24]:
import pandas as pd

In [25]:
# Task 1: Create a DataFrame manually
data = {
    "House ID": [101, 102, 103, 104, 105, 106],
    "Location": ["New York", "Chicago", "Boston", "New York", "Chicago", "Boston"],
    "Size (sq ft)": [850, 920, 1100, None, 1250, 1400],  # Missing value in row 4
    "Price ($1000s)": [220, 250, 275, 300, None, 360],  # Missing value in row 5
    "Bedrooms": [2, 3, 3, 3, 4, 4],
    "Bathrooms": [1, 2, 2, 2, 3, 3]
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

Original DataFrame:
   House ID  Location  Size (sq ft)  Price ($1000s)  Bedrooms  Bathrooms
0       101  New York         850.0           220.0         2          1
1       102   Chicago         920.0           250.0         3          2
2       103    Boston        1100.0           275.0         3          2
3       104  New York           NaN           300.0         3          2
4       105   Chicago        1250.0             NaN         4          3
5       106    Boston        1400.0           360.0         4          3


In [26]:
# Task 2: Check for missing values
# Check for missing values
print("\nMissing Values Before Cleaning:")
print(df.isnull().sum())


Missing Values Before Cleaning:
House ID          0
Location          0
Size (sq ft)      1
Price ($1000s)    1
Bedrooms          0
Bathrooms         0
dtype: int64


In [27]:
# Fill missing values
df = df.copy()  # Ensure modifications apply to the original DataFrame
df["Size (sq ft)"] = df["Size (sq ft)"].fillna(df["Size (sq ft)"].mean())
df["Price ($1000s)"] = df["Price ($1000s)"].fillna(df["Price ($1000s)"].median())


# Rename columns
df.rename(columns={"Size (sq ft)": "House_Size", "Price ($1000s)": "House_Price"}, inplace=True)

# Drop duplicate records (if any)
df.drop_duplicates(inplace=True)

print("\nCleaned DataFrame:")
print(df)


Cleaned DataFrame:
   House ID  Location  House_Size  House_Price  Bedrooms  Bathrooms
0       101  New York       850.0        220.0         2          1
1       102   Chicago       920.0        250.0         3          2
2       103    Boston      1100.0        275.0         3          2
3       104  New York      1104.0        300.0         3          2
4       105   Chicago      1250.0        275.0         4          3
5       106    Boston      1400.0        360.0         4          3


In [28]:
# Bonus Challenge

# Sort by House Price (Descending Order)
df_sorted = df.sort_values(by="House_Price", ascending=False)
print("\nSorted DataFrame (By Price Descending):")
print(df_sorted)

# Filter houses with more than 3 bedrooms
df_filtered = df[df["Bedrooms"] > 3]
print("\nFiltered DataFrame (More than 3 Bedrooms):")
print(df_filtered)


Sorted DataFrame (By Price Descending):
   House ID  Location  House_Size  House_Price  Bedrooms  Bathrooms
5       106    Boston      1400.0        360.0         4          3
3       104  New York      1104.0        300.0         3          2
2       103    Boston      1100.0        275.0         3          2
4       105   Chicago      1250.0        275.0         4          3
1       102   Chicago       920.0        250.0         3          2
0       101  New York       850.0        220.0         2          1

Filtered DataFrame (More than 3 Bedrooms):
   House ID Location  House_Size  House_Price  Bedrooms  Bathrooms
4       105  Chicago      1250.0        275.0         4          3
5       106   Boston      1400.0        360.0         4          3
