<a href="https://colab.research.google.com/github/sona5kyyy/PRODIGY_ML_01/blob/main/HousePricePrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('/content/housepp_test.csv' , delimiter = '\t')

# Display the first few rows to ensure it's loaded correctly
print("First 5 rows of the dataset:")
print(data.head())

# Step 1: Split the data into features (X) and target (y)
X = data[['square_footage', 'bedrooms', 'bathrooms']]
y = data['price']

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Normalize the features (scaling)
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and test sets
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 4: Train the Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Step 5: Make predictions on the test data
y_pred = model.predict(X_test_scaled)

# Step 6: Display predicted vs actual prices for test data
predicted_vs_actual = pd.DataFrame({
    'Predicted Price': y_pred,
    'Actual Price': y_test.values
})
print("\nPredicted vs Actual Prices for Test Data:")
print(predicted_vs_actual.head())

# Step 7: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nMean Squared Error (MSE): {mse}")
print(f"R-squared Score: {r2}")

# Step 8: Function to predict price for new data (new house)
def predict_price(square_footage, bedrooms, bathrooms):
    # Create a DataFrame for the new house data
    new_house = pd.DataFrame({
        'square_footage': [square_footage],
        'bedrooms': [bedrooms],
        'bathrooms': [bathrooms]
    })

    # Normalize the new house data using the existing scaler
    new_house_scaled = scaler.transform(new_house)

    # Predict the price using the trained model
    predicted_price = model.predict(new_house_scaled)

    # Return the predicted price
    return predicted_price[0]

# Step 9: Example: Predict price for a new house
new_square_footage = 2500
new_bedrooms = 4
new_bathrooms = 3

predicted_price = predict_price(new_square_footage, new_bedrooms, new_bathrooms)
print(f"\nPredicted Price for a house with {new_square_footage} sqft, {new_bedrooms} bedrooms, and {new_bathrooms} bathrooms: ${predicted_price:.2f}")


First 5 rows of the dataset:
   square_footage  bedrooms  bathrooms   price
0            1500         3          2  250000
1            2000         4          3  350000
2            1800         3          2  300000
3            2200         4          3  400000
4            1600         3          2  270000

Predicted vs Actual Prices for Test Data:
   Predicted Price  Actual Price
0    496400.471356        490000
1    602943.255983        620000
2    257827.229877        250000

Mean Squared Error (MSE): 131054692.53265846
R-squared Score: 0.9944205665430751

Predicted Price for a house with 2500 sqft, 4 bedrooms, and 3 bathrooms: $458677.48
