In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

In [8]:
# Load the dataset
house = pd.read_csv('dataset.csv')
house

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,Neighborhood,YearBuilt,Price
0,2126,4,1,Rural,1969,215355.283618
1,2459,3,2,Rural,1980,195014.221626
2,1860,2,1,Suburb,1970,306891.012076
3,2294,2,1,Urban,1996,206786.787153
4,2130,5,2,Suburb,2001,272436.239065
...,...,...,...,...,...,...
49995,1282,5,3,Rural,1975,100080.865895
49996,2854,2,2,Suburb,1988,374507.656727
49997,2979,5,3,Suburb,1962,384110.555590
49998,2596,5,2,Rural,1984,380512.685957


In [3]:
# Preprocessing
# Convert categorical data to numerical using Label Encoding
le = LabelEncoder()
house['Neighborhood'] = le.fit_transform(house['Neighborhood'])

In [9]:
# Define features (X) and target variable (y)
X = house[['SquareFeet', 'Bedrooms', 'Bathrooms', 'Neighborhood', 'YearBuilt']]
y = house['Price']

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Create a linear regression model
model = LinearRegression()

In [12]:
# Train the model
model.fit(X_train, y_train)

LinearRegression()

In [13]:
# Make predictions on the test set
predictions = model.predict(X_test)

In [14]:
# Evaluate the model
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 2437053699.8864026


In [18]:
# Now,use the trained model to make predictions on new data
# Example: predict the price for a new house
square_feet = float(input("Enter the square footage of the new house: "))
bedrooms = int(input("Enter the number of bedrooms: "))
bathrooms = int(input("Enter the number of bathrooms: "))
neighborhood = input("Enter the neighborhood: ")
year_built = int(input("Enter the year built: "))

Enter the square footage of the new house: 2126
Enter the number of bedrooms: 4
Enter the number of bathrooms: 1
Enter the neighborhood: Rural
Enter the year built: 1969


In [19]:
# Check if the label is present in the encoder's classes
if neighborhood in le.classes_:
    # Transform the 'Neighborhood' label to numerical
    neighborhood_numerical = le.transform([neighborhood])[0]
else:
    # Handle the case where the label is unseen
    neighborhood_numerical = -1  # Assign a default numerical value

In [23]:
# Make predictions
new_house = pd.DataFrame([[square_feet, bedrooms, bathrooms, neighborhood_numerical, year_built]],
                         columns=['SquareFeet', 'Bedrooms', 'Bathrooms', 'Neighborhood', 'YearBuilt'])

predicted_price = model.predict(new_house[['SquareFeet', 'Bedrooms', 'Bathrooms', 'Neighborhood', 'YearBuilt']])
formatted_price = round(predicted_price[0], 2)
print(f'Predicted Price for the new house: {formatted_price}')
# print(f'Predicted Price for the new house: {predicted_price}')

Predicted Price for the new house: 235752.53
