In [None]:
# Step 1: Install and Import Required Libraries
# !pip install scikit-learn  # Uncomment this if scikit-learn is not installed

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Step 2: Load the Dataset (Simulated Data)
data = {
    'Area (sq ft)': [2000, 1500, 1800, 2200, np.nan, 2500, 2700, 1600, 1400, 2100],
    'Bedrooms': [3, 2, 3, 4, 3, 5, np.nan, 2, 2, 4],
    'Price ($1000s)': [500, 350, 450, 600, 400, 750, 800, 300, 280, 650]
}
df = pd.DataFrame(data)
print("Original Dataset:\n", df)

# Step 3: Handle Missing Values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
print("\nDataset after Handling Missing Values:\n", df_imputed)

# Step 4: Splitting Data into Training and Testing Sets
X = df_imputed[['Area (sq ft)', 'Bedrooms']]  # Features
y = df_imputed['Price ($1000s)']               # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("\nTraining Set Size:", X_train.shape)
print("Testing Set Size:", X_test.shape)

# Step 5: Feature Scaling using StandardScaler and MinMaxScaler

# Standardization: mean=0, variance=1
scaler_standard = StandardScaler()
X_train_standardized = scaler_standard.fit_transform(X_train)
X_test_standardized = scaler_standard.transform(X_test)

# Normalization: scaling features to [0, 1]
scaler_minmax = MinMaxScaler()
X_train_normalized = scaler_minmax.fit_transform(X_train)
X_test_normalized = scaler_minmax.transform(X_test)

print("\nStandardized Data (First 3 rows):\n", X_train_standardized[:3])
print("\nNormalized Data (First 3 rows):\n", X_train_normalized[:3])
