In [3]:
import pandas as pd
import numpy as np

# Set the random seed for reproducibility
np.random.seed(42)

# Number of users
num_users = 10000

# Possible values
locations = ['Wadala', 'Dadar', 'Chembur', 'Chunabhatti', 'Matunga']
branches = ['CMPN', 'INFT', 'BIOMED', 'EXTC', 'EXCS']
genders = ['Male', 'Female', 'Any']
budget_values = [5000, 10000, 15000, 20000, 25000]

# Generate data
user_data = {
    'user_id': np.arange(1, num_users + 1),
    'gender_preference': np.random.choice(genders, size=num_users),
    'budget_min': np.random.choice(budget_values, size=num_users),
    'budget_max': np.random.choice(budget_values, size=num_users),
    'location_preference': np.random.choice(locations, size=num_users),
    'branch_preference': np.random.choice(branches, size=num_users),
    'cleanliness_level': np.random.randint(1, 6, size=num_users),
    'noise_tolerance': np.random.randint(1, 6, size=num_users),
    'smoking_tolerance': np.random.choice([True, False], size=num_users),
    'pets_tolerance': np.random.choice([True, False], size=num_users),
    'party_tolerance': np.random.choice([True, False], size=num_users),
    
}

# Ensure min <= max
user_data['budget_min'], user_data['budget_max'] = (
    np.minimum(user_data['budget_min'], user_data['budget_max']),
    np.maximum(user_data['budget_min'], user_data['budget_max'])
)

# Create DataFrame
user_df = pd.DataFrame(user_data)

# Save to CSV
user_df.to_csv('user_profiles.csv', index=False)

# Preview
print(user_df.head())



   user_id gender_preference  budget_min  budget_max location_preference  \
0        1               Any        5000       10000             Chembur   
1        2              Male        5000       10000               Dadar   
2        3               Any        5000       20000               Dadar   
3        4               Any        5000       20000             Matunga   
4        5              Male        5000       10000               Dadar   

  branch_preference  cleanliness_level  noise_tolerance  smoking_tolerance  \
0              INFT                  2                1               True   
1              CMPN                  1                2               True   
2              EXCS                  5                5               True   
3              INFT                  2                1               True   
4            BIOMED                  4                2              False   

   pets_tolerance  party_tolerance  
0           False             True  


In [5]:
##Explanation of the Code:
##Imports: The code imports the necessary libraries (pandas for data manipulation and numpy for generating random data).
##Seed: A random seed is set for reproducibility.
##User Count: Defines the number of users (5000).
##Attributes: Specifies possible values for location_preference and branch_preference.
##Data Generation: Generates random values for each user profile attribute.
##DataFrame: Creates a pandas DataFrame to hold the generated user profiles.
##CSV Output: Optionally saves the DataFrame as a CSV file called user_profiles.csv.
##Preview: Displays the first few rows of the generated dataset.