In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error
import numpy as np

Calculated TotalHouseArea in order to calculate rent

In [3]:

import pandas as pd
df = pd.read_csv('train.csv')
df["TotalHouseArea"] = (
    df["TotalBsmtSF"] + df["1stFlrSF"] + df["2ndFlrSF"] +
    df["GarageArea"] + df["WoodDeckSF"] + df["OpenPorchSF"] +
    df["EnclosedPorch"] + df["3SsnPorch"] + df["ScreenPorch"]
)
features = [
    'TotalHouseArea', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd',
    'FullBath', 'HalfBath', 'OverallQual', 'OverallCond',
    'YearBuilt', 'Neighborhood'
]


In [4]:
print(df.columns.tolist())


['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'PoolQC'

According to standard norms 1% of Actual sale price is the monthly rent 

In [5]:
df['EstimatedMonthlyRent'] = df['SalePrice'] * 0.01
target = 'EstimatedMonthlyRent'

In [None]:

default_values = {
    "TotalHouseArea": df["TotalHouseArea"].median(), 
    "BedroomAbvGr": df["BedroomAbvGr"].mode()[0],  
    "KitchenAbvGr": df["KitchenAbvGr"].mode()[0],
    "TotRmsAbvGrd": df["TotRmsAbvGrd"].median(),
    "FullBath": df["FullBath"].mode()[0],
    "HalfBath": df["HalfBath"].mode()[0],
    "OverallQual": df["OverallQual"].median(),
    "OverallCond": df["OverallCond"].median(),
    "YearBuilt": df["YearBuilt"].median(),
    "Neighborhood": df["Neighborhood"].mode()[0],  
    target: df[target].median()  
}

df = df.fillna(default_values)

In [7]:
X = df[features]
y = df[target]

In [8]:
categorical = ['Neighborhood']
numerical = list(set(features) - set(categorical))

In [9]:
preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical)
], remainder='passthrough')

In [10]:
#Random Forest
model = make_pipeline(preprocessor, RandomForestRegressor(n_estimators=100, random_state=42))


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
model.fit(X_train, y_train)
preds = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, preds))
print(f"RMSE: ${rmse:.2f}")

RMSE: $280.18


In [13]:
df['PredictedRent'] = model.predict(X)
print(df[['Neighborhood', 'TotalHouseArea', 'OverallQual', 'PredictedRent']].head(10))

  Neighborhood  TotalHouseArea  OverallQual  PredictedRent
0      CollgCr            3175            7      2042.4900
1      Veenker            3282            6      1801.4500
2      CollgCr            3356            7      2103.2738
3      Crawfor            3422            7      1469.9700
4      NoRidge            4455            8      2592.3729
5      Mitchel            3028            5      1521.7774
6      Somerst            4328            8      2852.4093
7       NWAmes            4348            7      2126.9439
8      OldTown            3489            7      1352.9900
9      BrkSide            2277            5      1184.2000


To Estimate Rent of a house

In [14]:
import pandas as pd


new_house = pd.DataFrame({
    'TotalHouseArea': [2500],
    'BedroomAbvGr': [3],
    'KitchenAbvGr': [1],
    'TotRmsAbvGrd': [7],
    'FullBath': [2],
    'HalfBath': [1],
    'OverallQual': [7],
    'OverallCond': [5],
    'YearBuilt': [2010],
    'Neighborhood': ['OldTown']
})


predicted_rent = model.predict(new_house)
print(f"Estimated Rent: ${predicted_rent[0]:.2f} per month")


Estimated Rent: $1651.52 per month


In [15]:
df.to_csv('output.csv', index=False)

In [16]:
avg_rent = df.groupby('Neighborhood')['PredictedRent'].mean().round(2).reset_index()
print(avg_rent)

   Neighborhood  PredictedRent
0       Blmngtn        1950.13
1       Blueste        1474.15
2        BrDale        1091.11
3       BrkSide        1237.71
4       ClearCr        2057.54
5       CollgCr        1978.14
6       Crawfor        2016.14
7       Edwards        1301.74
8       Gilbert        1922.23
9        IDOTRR        1082.30
10      MeadowV        1078.30
11      Mitchel        1583.12
12        NAmes        1459.47
13      NPkVill        1458.07
14       NWAmes        1902.76
15      NoRidge        3299.18
16      NridgHt        3121.53
17      OldTown        1321.65
18        SWISU        1457.84
19       Sawyer        1373.43
20      SawyerW        1859.40
21      Somerst        2248.38
22      StoneBr        3009.92
23       Timber        2429.37
24      Veenker        2297.69


Here,Red represents houses with  rent above $1500
     Orange represents houses with  rent above $1200 but less than $1500
     Green represents houses with  rent above $900 but less than $1200
     Blue represents houses with high rent less than $900

In [18]:
import folium
import pandas as pd

avg_rent_data = {
    'Neighborhood': ['Blmngtn', 'Blueste', 'BrDale', 'BrkSide', 'ClearCr', 'CollgCr',
                    'Crawfor', 'Edwards', 'Gilbert', 'IDOTRR', 'MeadowV', 'Mitchel',
                    'NAmes', 'NPkVill', 'NWAmes', 'NoRidge', 'NridgHt', 'OldTown',
                    'SWISU', 'Sawyer', 'SawyerW', 'Somerst', 'StoneBr', 'Timber', 'Veenker'],
    'PredictedRent': [1170.10, 884.56, 654.25, 742.51, 1235.04, 1187.21, 1209.83,
                     780.70, 1153.18, 649.54, 646.76, 949.28, 875.70, 873.37,
                     1141.41, 1978.38, 1873.77, 793.52, 874.84, 824.27, 1115.58,
                     1349.18, 1807.53, 1456.32, 1377.87]
}
neighborhood_coords = {
    'Blmngtn': (42.079, -93.620),
    'Blueste': (42.067, -93.639),
    'BrDale': (42.052, -93.619),
    'BrkSide': (42.034, -93.615),
    'ClearCr': (42.061, -93.629),
    'CollgCr': (42.019, -93.651),
    'Crawfor': (42.026, -93.631),
    'Edwards': (42.023, -93.685),
    'Gilbert': (42.105, -93.649),
    'IDOTRR': (42.018, -93.612),
    'MeadowV': (42.034, -93.655),
    'Mitchel': (42.067, -93.640),
    'NAmes': (42.042, -93.615),
    'NoRidge': (42.073, -93.660),
    'NPkVill': (42.050, -93.643),
    'NridgHt': (42.050, -93.656),
    'NWAmes': (42.056, -93.675),
    'OldTown': (42.027, -93.613),
    'SWISU': (42.018, -93.651),
    'Sawyer': (42.023, -93.656),
    'SawyerW': (42.024, -93.665),
    'Somerst': (42.065, -93.639),
    'StoneBr': (42.076, -93.635),
    'Timber': (42.030, -93.669),
    'Veenker': (42.073, -93.660)
}  # Your existing coordinates

# Create DataFrame
df_avg_rent = pd.DataFrame(avg_rent_data)

# Merge with coordinates
rent_coords = df_avg_rent.merge(
    pd.DataFrame(list(neighborhood_coords.items()), columns=['Neighborhood', 'Coords']),
    on='Neighborhood'
)

# Create base map
m = folium.Map(location=[42.03, -93.62], zoom_start=13, tiles='OpenStreetMap')

# Color function
def get_rent_color(rent):
    if rent > 1500: return 'red'
    elif rent > 1200: return 'orange'
    elif rent > 900: return 'green'
    else: return 'blue'

# Add markers with color-coding
for index, row in rent_coords.iterrows():
    folium.Marker(
        location=row['Coords'],
        popup=folium.Popup(
            f"<b>{row['Neighborhood']}</b><br>"
            f"Average Rent: ${row['PredictedRent']:,.2f}",
            max_width=250
        ),
        icon=folium.Icon(
            color=get_rent_color(row['PredictedRent']),
            icon='home',
            prefix='fa'
        )
    ).add_to(m)

# Add title
title_html = '''
     <h3 align="center" style="font-size:16px"><b>Ames Neighborhood Average Rents</b></h3>
'''
m.get_root().html.add_child(folium.Element(title_html))

# Save and display
m.save('ames_rent_map.html')
m

Predicted Average rent for each neighbourhood plotted real world points for each neighbourhood

In [21]:
avg_rent_data = {
    'Neighborhood': ['Blmngtn', 'Blueste', 'BrDale', 'BrkSide', 'ClearCr', 'CollgCr',
                    'Crawfor', 'Edwards', 'Gilbert', 'IDOTRR', 'MeadowV', 'Mitchel',
                    'NAmes', 'NPkVill', 'NWAmes', 'NoRidge', 'NridgHt', 'OldTown',
                    'SWISU', 'Sawyer', 'SawyerW', 'Somerst', 'StoneBr', 'Timber', 'Veenker'],
    'PredictedRent': [1170.10, 884.56, 654.25, 742.51, 1235.04, 1187.21, 1209.83,
                     780.70, 1153.18, 649.54, 646.76, 949.28, 875.70, 873.37,
                     1141.41, 1978.38, 1873.77, 793.52, 874.84, 824.27, 1115.58,
                     1349.18, 1807.53, 1456.32, 1377.87]
}

neighborhood_coords = {
    'Blmngtn': (42.079, -93.620),
    'Blueste': (42.067, -93.639),
    'BrDale': (42.052, -93.619),
    'BrkSide': (42.034, -93.615),
    'ClearCr': (42.061, -93.629),
    'CollgCr': (42.019, -93.651),
    'Crawfor': (42.026, -93.631),
    'Edwards': (42.023, -93.685),
    'Gilbert': (42.105, -93.649),
    'IDOTRR': (42.018, -93.612),
    'MeadowV': (42.034, -93.655),
    'Mitchel': (42.067, -93.640),
    'NAmes': (42.042, -93.615),
    'NoRidge': (42.073, -93.660),
    'NPkVill': (42.050, -93.643),
    'NridgHt': (42.050, -93.656),
    'NWAmes': (42.056, -93.675),
    'OldTown': (42.027, -93.613),
    'SWISU': (42.018, -93.651),
    'Sawyer': (42.023, -93.656),
    'SawyerW': (42.024, -93.665),
    'Somerst': (42.065, -93.639),
    'StoneBr': (42.076, -93.635),
    'Timber': (42.030, -93.669),
    'Veenker': (42.073, -93.660)
}