<a href="https://colab.research.google.com/github/safaet/mlprojects/blob/main/Simple_model_Implement_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Creating the datasset

In [1]:
import pandas as pd
import numpy as np

# Set a seed for reproducibility
np.random.seed(42)

# Create a synthetic dataset
data = {
    'Size (sq. ft.)': np.random.randint(800, 5000, size=100),
    'Bedrooms': np.random.randint(1, 10, size=100),
    'Bathrooms': np.random.randint(1, 6, size=100),
    'Lot Size (acres)': np.random.uniform(0.1, 5, size=100),
    'Year Built': np.random.randint(1900, 2023, size=100),
    'Neighborhood Safety': np.random.randint(1, 6, size=100),
    'Proximity to Public Transport (miles)': np.random.uniform(0.1, 10, size=100),
    'Distance to School (miles)': np.random.uniform(0.1, 5, size=100),
    'House Style': np.random.choice(['Ranch', 'Colonial', 'Victorian', 'Contemporary', 'Tudor'], size=100),
    'Garage Capacity': np.random.randint(0, 4, size=100),
    'Swimming Pool': np.random.choice(['Yes', 'No'], size=100),
    'Energy Efficiency': np.random.choice(['A', 'B', 'C', 'D', 'E', 'F'], size=100),
    'Property Tax Rate': np.random.uniform(0.5, 3, size=100),
    'Distance to Park (miles)': np.random.uniform(0.1, 5, size=100),
    'Market Trends': np.random.choice(['Booming', 'Stable', 'Declining'], size=100),
    'Home Security System': np.random.choice(['Yes', 'No'], size=100),
    'Shopping Centers (miles)': np.random.uniform(0.1, 5, size=100),
    'Quality of Construction': np.random.choice(['Low', 'Medium', 'High'], size=100),
    'Amenities Rating': np.random.randint(1, 6, size=100),
    'Distance to Hospital (miles)': np.random.uniform(0.1, 10, size=100),
    'House Price': np.random.randint(200000, 800000, size=100)
}

# Create a DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('house_price_100.csv', index=False)


# Main Task

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset

In [5]:
df = pd.read_csv('house_price.csv')

## Display the first few rows of the dataset to understand its structure

In [7]:
df.head()

Unnamed: 0,Size (sq. ft.),Bedrooms,Bathrooms,Lot Size (acres),Year Built,Neighborhood Safety,Proximity to Public Transport (miles),Distance to School (miles),House Style,Garage Capacity,...,Energy Efficiency,Property Tax Rate,Distance to Park (miles),Market Trends,Home Security System,Shopping Centers (miles),Quality of Construction,Amenities Rating,Distance to Hospital (miles),House Price
0,1660,4,4,3.176683,1992,5,2.869659,1.803334,Colonial,2,...,F,2.330461,4.952672,Declining,Yes,0.832945,Low,1,7.18336,699499
1,4572,5,5,1.051942,1925,3,4.170947,0.255843,Colonial,0,...,A,1.126827,0.101164,Declining,Yes,2.376533,High,3,5.260218,693948
2,3892,7,5,0.44761,1973,3,6.067541,2.788705,Tudor,3,...,E,2.082745,1.008817,Booming,Yes,3.434237,Low,2,8.241695,507257
3,1266,7,3,2.044241,1989,2,2.782481,2.718675,Colonial,2,...,C,1.735685,1.981423,Booming,No,2.038171,High,4,4.365535,798357
4,4244,4,1,0.348766,1933,4,1.418533,1.844358,Contemporary,1,...,C,1.933236,4.669691,Declining,No,4.876029,Medium,4,8.977471,429189


## Check for missing values

In [8]:
df.isnull().sum()

Size (sq. ft.)                           0
Bedrooms                                 0
Bathrooms                                0
Lot Size (acres)                         0
Year Built                               0
Neighborhood Safety                      0
Proximity to Public Transport (miles)    0
Distance to School (miles)               0
House Style                              0
Garage Capacity                          0
Swimming Pool                            0
Energy Efficiency                        0
Property Tax Rate                        0
Distance to Park (miles)                 0
Market Trends                            0
Home Security System                     0
Shopping Centers (miles)                 0
Quality of Construction                  0
Amenities Rating                         0
Distance to Hospital (miles)             0
House Price                              0
dtype: int64

# Split the dataset into feature (X) and target variable (y)

In [9]:
X = df.drop('House Price', axis=1)
y = df['House Price']

## Perform one-hot encoding for categorical variable (if needed)

In [10]:
X = pd.get_dummies(X)

## Split the dataset into training and testing sets

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Select a Machine Learning Algorithm (Linear Regression)

In [12]:
model = LinearRegression()

# Train the Model

In [13]:
model.fit(X_train, y_train)

# Evaluate the Model

In [14]:
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)

In [15]:
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 58740081760.74007


# Make Predictions

In [16]:
new_data = X.head(1)  # Use the first row of the dataset as new data for prediction
predicted_price = model.predict(new_data)

In [19]:
new_data

Unnamed: 0,Size (sq. ft.),Bedrooms,Bathrooms,Lot Size (acres),Year Built,Neighborhood Safety,Proximity to Public Transport (miles),Distance to School (miles),Garage Capacity,Property Tax Rate,...,Energy Efficiency_E,Energy Efficiency_F,Market Trends_Booming,Market Trends_Declining,Market Trends_Stable,Home Security System_No,Home Security System_Yes,Quality of Construction_High,Quality of Construction_Low,Quality of Construction_Medium
0,1660,4,4,3.176683,1992,5,2.869659,1.803334,2,2.330461,...,0,1,0,1,0,0,1,0,1,0


In [17]:
print(f'Predicted House Price: {predicted_price[0]}')

Predicted House Price: 340536.0174055396
