In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [5]:
# Data Preparation

data = pd.read_csv('/Users/clark/Desktop/ironhack/final-project/files/Cleaned dataset.csv')

In [6]:
# Model Selection and Training

X = data.drop(['Property ID', 'Neighborhood', 'Annual Revenue'], axis=1)
y = data['Annual Revenue']

In [7]:
# Normalize numerical features

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [9]:
# Train the model

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [10]:
# Model Evaluation

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 279824852.6366076


In [11]:
# Prediction

neighborhood_revenue_predictions = model.predict(X_scaled)

In [12]:
# Ranking

neighborhoods = data['Neighborhood']
predicted_revenue_df = pd.DataFrame({'Neighborhood': neighborhoods, 'Predicted Revenue': neighborhood_revenue_predictions})
ranked_neighborhoods = predicted_revenue_df.sort_values(by='Predicted Revenue', ascending=False)

print(ranked_neighborhoods.head(10))  # Display top 10 neighborhoods by predicted revenue


                       Neighborhood  Predicted Revenue
20           la Dreta de l'Eixample          538690.36
2340         la Dreta de l'Eixample          480120.80
1311                       el Raval          418288.35
1836  Vilapicina i la Torre Llobeta          417309.94
1079           la Font d'en Fargues          403111.92
160          la Dreta de l'Eixample          330295.76
1868         la Dreta de l'Eixample          311829.14
6398             la Sagrada Família          301090.03
6772             la Sagrada Família          300022.55
6722             la Sagrada Família          298150.09
