In [None]:
# Step 1: Ensure required libraries are installed
import subprocess
import sys

def install_package(package):
    try:
        __import__(package)
        print(f"{package} is already installed")
    except ImportError:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"{package} installed successfully")

required_packages = ['pandas', 'scikit-learn']
print("Checking and installing required packages...")
for package in required_packages:
    install_package(package)
print("Package installation check complete")

# Step 2: Read and Prepare the Dataset
try:
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import LabelEncoder, StandardScaler
    from sklearn.pipeline import Pipeline
    
    print("Reading CSV file...")
    df = pd.read_csv(r"C:\Users\marca\Downloads\kc_house_data.csv")
    print(f"Dataframe shape: {df.shape}")

    features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 
               'waterfront', 'condition', 'grade', 'sqft_above', 'sqft_basement', 
               'yr_built', 'zipcode', 'sqft_living15']
    X = df[features].copy()
    y = df['price']
    print("Features selected")

    le = LabelEncoder()
    X.loc[:, 'zipcode'] = le.fit_transform(X['zipcode'])
    print("Zipcode encoded")

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    print("Features scaled")

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    print(f"Training set shape: {X_train.shape}, Test set shape: {X_test.shape}")

except Exception as e:
    print(f"Error in data preparation: {str(e)}")
    raise

# Step 3: Import libraries for regression models
try:
    from sklearn.linear_model import LinearRegression
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.neighbors import KNeighborsRegressor
    from sklearn.svm import SVR
    from sklearn.tree import DecisionTreeRegressor
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.metrics import r2_score
    print("Regression model libraries imported")

    models = {
        'Multiple Linear Regression': LinearRegression(),
        'Polynomial Regression': Pipeline([
            ('poly', PolynomialFeatures(degree=2)),
            ('linear', LinearRegression())
        ]),
        'KNN Regression': KNeighborsRegressor(n_neighbors=5),
        'Non-Linear SVR': SVR(kernel='rbf'),
        'Decision Tree': DecisionTreeRegressor(random_state=42),
        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42)
    }
    print("Models initialized")

except Exception as e:
    print(f"Error in model imports: {str(e)}")
    raise

# Step 4: Test Models and Compare Results
try:
    results = {}
    for name, model in models.items():
        print(f"Training {name}...")
        model.fit(X_train, y_train)
        
        print(f"Predicting with {name}...")
        y_pred = model.predict(X_test)
        
        score = r2_score(y_test, y_pred)
        results[name] = score
        print(f"{name} R2 Score: {score:.4f}")

    best_model_name = max(results, key=results.get)
    best_model = models[best_model_name]
    print(f"\nBest Model: {best_model_name} with R2 Score: {results[best_model_name]:.4f}")

except Exception as e:
    print(f"Error in model training/evaluation: {str(e)}")
    raise

# Step 5: Estimate new House Prices
try:
    print("Preparing new house data...")
    new_house = pd.DataFrame({
        'bedrooms': [3],
        'bathrooms': [2],
        'sqft_living': [2000],
        'sqft_lot': [8000],
        'floors': [1],
        'waterfront': [0],
        'condition': [3],
        'grade': [7],
        'sqft_above': [2000],
        'sqft_basement': [0],
        'yr_built': [1990],
        'zipcode': [le.transform([98028])[0]],
        'sqft_living15': [2000]
    })
    
    new_house_scaled = scaler.transform(new_house)
    print("New house data scaled")

    print("Predicting house price...")
    predicted_price = best_model.predict(new_house_scaled)
    print(f"\nPredicted price for a 3-bed, 2-bath, 1-level home in Kenmore, WA (98028): ${predicted_price[0]:,.2f}")

except Exception as e:
    print(f"Error in price prediction: {str(e)}")
    raise

print("Script execution completed")

Checking and installing required packages...
pandas is already installed
Installing scikit-learn...
scikit-learn installed successfully
Package installation check complete
Reading CSV file...
Dataframe shape: (21613, 21)
Features selected
Zipcode encoded
Features scaled
Training set shape: (17290, 13), Test set shape: (4323, 13)
Regression model libraries imported
Models initialized
Training Multiple Linear Regression...
Predicting with Multiple Linear Regression...
Multiple Linear Regression R2 Score: 0.6477
Training Polynomial Regression...
Predicting with Polynomial Regression...
Polynomial Regression R2 Score: 0.7214
Training KNN Regression...
Predicting with KNN Regression...
KNN Regression R2 Score: 0.6963
Training Non-Linear SVR...
Predicting with Non-Linear SVR...
