In [16]:
# Import libraries
import random
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [29]:
# Define the range of values for each feature
buildings = ["Charles Library", "Tuttleman", "Beury Hall", "Wachman Hall", "Alter Hall",
             "Mazur", "Gladfelter", "SERC", "Biology Life Sciences", "Speakman Hall"]
floors = [1, 2, 3, 4]
sections = ["Main", "East", "West", "North", "South"]
genders = ["Male", "Female", "Gender Neutral"]

cleanliness_values = [random.randint(1, 5) for _ in range(100)]
amenities_values = [random.randint(1, 5) for _ in range(100)]
privacy_values = [random.randint(1, 5) for _ in range(100)]

accessibility_options = ["Wheelchair accessible", "Not accessible"]
sizes = ["Small", "Medium", "Large"]
ambiances = ["Cozy", "Bright", "Dim", "Elegant", "Neutral"]
technologies = ["Low", "Medium", "High"]
maintenances = ["Weekly", "Bi-weekly", "Monthly"]

# Generate 100 rows of bathroom data
bathrooms_data = []
for i in range(1, 101):
    building = random.choice(buildings)
    floor = random.choice(floors)
    section = random.choice(sections)
    gender = random.choice(genders)
    cleanliness = random.choice(cleanliness_values)
    amenities = random.choice(amenities_values)
    privacy = random.choice(privacy_values)
    accessibility = random.choice(accessibility_options)
    size = random.choice(sizes)
    ambiance = random.choice(ambiances)
    technology = random.choice(technologies)
    maintenance = random.choice(maintenances)
    rating = round((cleanliness + amenities + privacy) / 3, 2)
    
    bathrooms_data.append([building, floor, section, gender, cleanliness, amenities, privacy, accessibility, size, ambiance, technology, maintenance, rating])

# Create a DataFrame
bathrooms_df = pd.DataFrame(bathrooms_data, 
                            columns=["building", "floor", "section", "gender", "cleanliness", "amenities", "privacy", "accessibility", "size", "ambiance", "technology", "maintenance", "rating"])

# Save the DataFrame to a CSV file
bathrooms_df.to_csv("bathroom_data.csv", index=False)

In [30]:
bathrooms_df.head(10)

Unnamed: 0,building,floor,section,gender,cleanliness,amenities,privacy,accessibility,size,ambiance,technology,maintenance,rating
0,Wachman Hall,2,West,Male,3,3,2,Wheelchair accessible,Small,Neutral,High,Monthly,2.67
1,Mazur,4,East,Female,4,2,4,Not accessible,Large,Dim,Low,Bi-weekly,3.33
2,Beury Hall,2,Main,Male,3,5,2,Not accessible,Medium,Dim,Medium,Monthly,3.33
3,Beury Hall,1,West,Female,1,3,4,Not accessible,Small,Dim,High,Monthly,2.67
4,Gladfelter,2,North,Male,4,2,2,Wheelchair accessible,Large,Neutral,Low,Monthly,2.67
5,Tuttleman,4,South,Gender Neutral,2,5,3,Wheelchair accessible,Medium,Dim,High,Weekly,3.33
6,SERC,1,Main,Female,2,5,4,Not accessible,Medium,Bright,Medium,Monthly,3.67
7,Biology Life Sciences,4,East,Male,1,5,4,Not accessible,Medium,Cozy,High,Monthly,3.33
8,Biology Life Sciences,3,North,Female,5,5,3,Wheelchair accessible,Medium,Elegant,Low,Weekly,4.33
9,Biology Life Sciences,1,South,Female,4,3,5,Wheelchair accessible,Small,Dim,Medium,Monthly,4.0


In [31]:
# Concatenate "floor" and "section" columns
bathrooms_df["Floor-Section"] = bathrooms_df["floor"].astype(str) + "-" + bathrooms_df["section"]

# Drop the original "floor" and "section" columns
bathrooms_df.drop(columns=["floor", "section"], inplace=True)
bathrooms_df.head(10)

Unnamed: 0,building,gender,cleanliness,amenities,privacy,accessibility,size,ambiance,technology,maintenance,rating,Floor-Section
0,Wachman Hall,Male,3,3,2,Wheelchair accessible,Small,Neutral,High,Monthly,2.67,2-West
1,Mazur,Female,4,2,4,Not accessible,Large,Dim,Low,Bi-weekly,3.33,4-East
2,Beury Hall,Male,3,5,2,Not accessible,Medium,Dim,Medium,Monthly,3.33,2-Main
3,Beury Hall,Female,1,3,4,Not accessible,Small,Dim,High,Monthly,2.67,1-West
4,Gladfelter,Male,4,2,2,Wheelchair accessible,Large,Neutral,Low,Monthly,2.67,2-North
5,Tuttleman,Gender Neutral,2,5,3,Wheelchair accessible,Medium,Dim,High,Weekly,3.33,4-South
6,SERC,Female,2,5,4,Not accessible,Medium,Bright,Medium,Monthly,3.67,1-Main
7,Biology Life Sciences,Male,1,5,4,Not accessible,Medium,Cozy,High,Monthly,3.33,4-East
8,Biology Life Sciences,Female,5,5,3,Wheelchair accessible,Medium,Elegant,Low,Weekly,4.33,3-North
9,Biology Life Sciences,Female,4,3,5,Wheelchair accessible,Small,Dim,Medium,Monthly,4.0,1-South


In [32]:
# Sort the dataset by "building" and "floor" columns
bathrooms_df_sorted = bathrooms_df.sort_values(by=["Floor-Section"])

# Reset the indices
bathrooms_df_sorted.reset_index(drop=True, inplace=True)
bathrooms_df_sorted.head(10)

Unnamed: 0,building,gender,cleanliness,amenities,privacy,accessibility,size,ambiance,technology,maintenance,rating,Floor-Section
0,SERC,Female,4,1,1,Not accessible,Medium,Bright,Medium,Weekly,2.0,1-East
1,Beury Hall,Female,4,1,3,Wheelchair accessible,Large,Cozy,Low,Weekly,2.67,1-East
2,Mazur,Male,3,5,1,Not accessible,Large,Cozy,Medium,Weekly,3.0,1-East
3,SERC,Female,2,5,4,Not accessible,Medium,Bright,Medium,Monthly,3.67,1-Main
4,Mazur,Female,3,2,3,Wheelchair accessible,Large,Elegant,Medium,Monthly,2.67,1-Main
5,Mazur,Gender Neutral,4,4,5,Wheelchair accessible,Medium,Elegant,Low,Weekly,4.33,1-North
6,Tuttleman,Gender Neutral,3,2,4,Wheelchair accessible,Large,Cozy,High,Bi-weekly,3.0,1-North
7,SERC,Male,4,1,4,Not accessible,Large,Cozy,High,Weekly,3.0,1-North
8,SERC,Female,2,4,5,Wheelchair accessible,Small,Neutral,Medium,Bi-weekly,3.67,1-North
9,Alter Hall,Male,1,4,5,Not accessible,Large,Elegant,Medium,Bi-weekly,3.33,1-North


In [41]:
# Load the dataset
data = bathrooms_df_sorted

# Define the rating scale
reader = Reader(rating_scale=(1, 5))

# Load the dataset into Surprise format
dataset = Dataset.load_from_df(data[['building', 'Floor-Section', 'rating']], reader)

# Split the dataset into training and testing sets
trainset, testset = train_test_split(dataset, test_size=0.4, random_state=42)

# Train the SVD model
model = SVD()
model.fit(trainset)

# Make predictions on the testing set
predictions = model.test(testset)

# Evaluate the model
accuracy.rmse(predictions)

RMSE: 0.7817


0.7817019238830035

In [42]:
# Get predictions for all bathrooms
all_bathrooms = dataset.build_full_trainset().build_anti_testset()
all_predictions = model.test(all_bathrooms)

# Debug print statements
print("Number of predictions:", len(all_predictions))
print("Sample predictions:", all_predictions[:10])

Number of predictions: 119
Sample predictions: [Prediction(uid='SERC', iid='1-South', r_ui=3.0071, est=3.397681270283722, details={'was_impossible': False}), Prediction(uid='SERC', iid='1-West', r_ui=3.0071, est=3.3132408734875685, details={'was_impossible': False}), Prediction(uid='SERC', iid='2-Main', r_ui=3.0071, est=3.018397654414674, details={'was_impossible': False}), Prediction(uid='SERC', iid='2-West', r_ui=3.0071, est=3.133337010949896, details={'was_impossible': False}), Prediction(uid='SERC', iid='3-North', r_ui=3.0071, est=3.240668226076026, details={'was_impossible': False}), Prediction(uid='SERC', iid='3-West', r_ui=3.0071, est=3.3569890651174905, details={'was_impossible': False}), Prediction(uid='SERC', iid='4-East', r_ui=3.0071, est=3.1967809744217743, details={'was_impossible': False}), Prediction(uid='SERC', iid='4-Main', r_ui=3.0071, est=3.228107050419348, details={'was_impossible': False}), Prediction(uid='SERC', iid='4-North', r_ui=3.0071, est=3.240668226076026, d

In [44]:
# Rank bathrooms based on predicted ratings
ranked_bathrooms = sorted(all_predictions, key=lambda x: x.est, reverse=True)

# Output ranked list of bathrooms
for i, pred in enumerate(ranked_bathrooms[:10]):
    print(f"{i+1}. {pred.uid}, Bathroom {pred.iid} ({pred.est:.2f})")

1. Wachman Hall, Bathroom 2-South (3.46)
2. Wachman Hall, Bathroom 3-West (3.42)
3. SERC, Bathroom 1-South (3.40)
4. Wachman Hall, Bathroom 1-North (3.37)
5. Charles Library, Bathroom 1-North (3.37)
6. SERC, Bathroom 3-West (3.36)
7. Biology Life Sciences, Bathroom 4-West (3.34)
8. SERC, Bathroom 4-South (3.33)
9. Wachman Hall, Bathroom 1-Main (3.32)
10. SERC, Bathroom 1-West (3.31)
