In [6]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, classification_report
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Create a synthetic property dataset with 500 entries
def create_property_data(num_entries=500):
    np.random.seed(42)  # For reproducibility
    locations = ['Downtown', 'Suburb', 'Countryside', 'Pune']
    amenities = ['wifi', 'gym', 'pool', 'parking', 'pet_friendly']

    data = {
        'title': [f'Property {i+1}' for i in range(num_entries)],
        'location': np.random.choice(locations, num_entries),
        'size': np.random.randint(500, 3000, num_entries),  # Size in square feet
        'amenities_count': np.random.randint(1, 5, num_entries),
        'price': np.random.randint(1000, 4000, num_entries),  # Monthly rent
        'amenities': [
            np.random.choice(amenities, np.random.randint(1, len(amenities) + 1), replace=False).tolist()
            for _ in range(num_entries)
        ]
    }
    return pd.DataFrame(data)

# Load the dataset
properties_df = create_property_data(500)
print("Property Dataset (First 10 Rows):")
print(properties_df.head(10))





Property Dataset (First 10 Rows):
         title     location  size  amenities_count  price  \
0   Property 1  Countryside   690                4   3679   
1   Property 2         Pune  2800                3   3106   
2   Property 3     Downtown  1480                1   1127   
3   Property 4  Countryside  2506                3   1735   
4   Property 5  Countryside  1684                3   3768   
5   Property 6         Pune   627                3   3573   
6   Property 7     Downtown  2565                3   1520   
7   Property 8     Downtown  2316                4   3599   
8   Property 9  Countryside  2258                1   1236   
9  Property 10       Suburb  1069                4   2304   

                                  amenities  
0                                    [wifi]  
1                                    [pool]  
2           [pet_friendly, pool, gym, wifi]  
3                      [pool, pet_friendly]  
4                                    [pool]  
5  [pool, wifi, pe

In [7]:

# Price Prediction Model
def price_prediction_model(df):
    # Prepare data
    X = df[['size', 'amenities_count']]
    y = df['price']

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model
    model = RandomForestRegressor()
    model.fit(X_train, y_train)

    # Predictions
    predictions = model.predict(X_test)

    # Evaluate the model
    mse = mean_squared_error(y_test, predictions)
    print(f'Mean Squared Error for Price Prediction: {mse:.2f}')

price_prediction_model(properties_df)

Mean Squared Error for Price Prediction: 974606.84


In [8]:
# Vacancy Prediction Model
def vacancy_prediction_model(df):
    # Prepare data
    X = df[['size', 'price', 'amenities_count']]
    y = df['vacant'] = np.random.choice([0, 1], len(df))  # Random vacancy status for demonstration

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    # Predictions
    predictions = model.predict(X_test)

    # Evaluate the model
    print(classification_report(y_test, predictions))

vacancy_prediction_model(properties_df)




              precision    recall  f1-score   support

           0       0.51      0.57      0.54        54
           1       0.41      0.35      0.38        46

    accuracy                           0.47       100
   macro avg       0.46      0.46      0.46       100
weighted avg       0.46      0.47      0.46       100



In [9]:

# Property Recommendation Model
def recommend_properties(user_preferences, properties_df):
    # Filter based on user preferences
    filtered_properties = properties_df[
        (properties_df['price'] <= user_preferences['budget']) &
        (properties_df['location'] == user_preferences['location'])
    ]

    # Define a scoring function based on amenities match
    user_amenities_set = set(user_preferences['amenities'])

    def score_property(row):
        property_amenities_set = set(row['amenities'])
        return len(user_amenities_set.intersection(property_amenities_set))

    # Apply scoring function
    filtered_properties['score'] = filtered_properties.apply(score_property, axis=1)

    # Sort by score (more matches = higher score)
    recommended = filtered_properties.sort_values(by='score', ascending=False)

    return recommended[['title', 'location', 'price', 'amenities', 'score']]

# Example user preferences
user_preferences = {
    'budget': 2500,
    'location': 'Pune',
    'amenities': ['wifi', 'gym']  # User's preferred amenities
}

# Get recommendations
recommended_properties = recommend_properties(user_preferences, properties_df)
print("\nRecommended Properties:")
print(recommended_properties.head(10))



Recommended Properties:
            title location  price                                 amenities  \
393  Property 394     Pune   1284        [wifi, gym, parking, pet_friendly]   
341  Property 342     Pune   1588           [gym, pet_friendly, wifi, pool]   
233  Property 234     Pune   1884  [pool, parking, pet_friendly, wifi, gym]   
232  Property 233     Pune   2333  [pool, wifi, parking, pet_friendly, gym]   
312  Property 313     Pune   1215  [gym, pool, wifi, pet_friendly, parking]   
350  Property 351     Pune   2256                      [parking, gym, wifi]   
143  Property 144     Pune   1989                [pool, wifi, gym, parking]   
149  Property 150     Pune   2296  [pool, gym, parking, pet_friendly, wifi]   
162  Property 163     Pune   2372  [gym, pet_friendly, parking, wifi, pool]   
206  Property 207     Pune   1292        [parking, gym, pet_friendly, wifi]   

     score  
393      2  
341      2  
233      2  
232      2  
312      2  
350      2  
143      2  
1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_properties['score'] = filtered_properties.apply(score_property, axis=1)


In [10]:
def clustering_model(properties_df):
    # Preparing the data for clustering
    feature_cols = ['size', 'price', 'amenities_count']
    X = properties_df[feature_cols]

    # Standardizing the data
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # K-Means Clustering
    kmeans = KMeans(n_clusters=3, random_state=42)  # Adjust n_clusters as needed
    properties_df['cluster'] = kmeans.fit_predict(X_scaled)

    return properties_df, kmeans # Return kmeans object along with the dataframe

# Apply clustering
clustered_properties, kmeans = clustering_model(properties_df) # Get kmeans object
print("\nClustered Property Dataset (First 10 Rows):")
print(clustered_properties.head(10))



Clustered Property Dataset (First 10 Rows):
         title     location  size  amenities_count  price  \
0   Property 1  Countryside   690                4   3679   
1   Property 2         Pune  2800                3   3106   
2   Property 3     Downtown  1480                1   1127   
3   Property 4  Countryside  2506                3   1735   
4   Property 5  Countryside  1684                3   3768   
5   Property 6         Pune   627                3   3573   
6   Property 7     Downtown  2565                3   1520   
7   Property 8     Downtown  2316                4   3599   
8   Property 9  Countryside  2258                1   1236   
9  Property 10       Suburb  1069                4   2304   

                                  amenities  vacant  cluster  
0                                    [wifi]       1        1  
1                                    [pool]       0        2  
2           [pet_friendly, pool, gym, wifi]       0        0  
3                      [pool, p

In [11]:
# Function to recommend properties based on cluster
def cluster_based_recommendations(user_preferences, clustered_properties, kmeans): # Pass kmeans as argument
    # Filter properties in the same cluster as the user's preferences
    user_features = np.array([[user_preferences['size'], user_preferences['budget'], len(user_preferences['amenities'])]])
    user_features_scaled = StandardScaler().fit_transform(user_features)

    user_cluster = kmeans.predict(user_features_scaled)[0]

    recommended = clustered_properties[clustered_properties['cluster'] == user_cluster]

    return recommended[['title', 'location', 'price', 'amenities', 'cluster']]

# Example user preferences for clustering recommendations
user_clustering_preferences = {
    'size': 1500,
    'budget': 2500,
    'amenities': ['wifi', 'gym']  # User's preferred amenities
}

# Get cluster-based recommendations
cluster_recommendations = cluster_based_recommendations(user_clustering_preferences, clustered_properties, kmeans) # Pass kmeans object
print("\nCluster-Based Recommended Properties:")
print(cluster_recommendations.head(10))


Cluster-Based Recommended Properties:
          title     location  price                                 amenities  \
1    Property 2         Pune   3106                                    [pool]   
3    Property 4  Countryside   1735                      [pool, pet_friendly]   
6    Property 7     Downtown   1520                              [wifi, pool]   
8    Property 9  Countryside   1236                      [wifi, gym, parking]   
11  Property 12  Countryside   2301        [gym, wifi, parking, pet_friendly]   
13  Property 14  Countryside   3051                            [parking, gym]   
18  Property 19         Pune   1804                [wifi, pool, pet_friendly]   
19  Property 20  Countryside   1074                         [gym, pool, wifi]   
21  Property 22     Downtown   3175  [parking, wifi, pool, gym, pet_friendly]   
22  Property 23       Suburb   3089                                    [pool]   

    cluster  
1         2  
3         2  
6         2  
8         2  