# Generate preferences

In [1]:
import os
import random
import numpy as np
import pandas as pd

# make all output interactive
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# Define file structure constants
# ================================
DATA_PATH = os.path.join(os.getcwd(), "../data/")
RAW_DATA_PATH = os.path.join(DATA_PATH, "raw")
DERIVED_DATA_PATH = os.path.join(DATA_PATH, "derived")

# Define notebook specific constants
# ================================
NUM_PREFERENCES_GENERATE = 100000
IMPORTANCE_VALUES = [0, 1, 2, 3]
IMPORTANCE_WEIGHTS = [1, 2, 3, 3]

## Accomodation preferences

See [Attributes.md](attributes.md#accomodation-theme) for a detailed description of this section in the user input form and how the preferences for each attribute are derived accordingly.

In [3]:
ACCOMODATION_ATTR_NAMES = ["acc_rented_house_relative","acc_house_onebed","acc_house_twobed","acc_house_threebed","acc_house_fourplusbed","acc_rented_apartment_relative","acc_apartment_onebed","acc_apartment_twobed","acc_apartment_threebed","acc_apartment_fourplusbed","acc_shared_relative","acc_rent_1_74","acc_rent_75_99","acc_rent_100_149","acc_rent_150_199","acc_rent_200_224","acc_rent_225_274","acc_rent_275_349","acc_rent_350_449","acc_rent_450_549","acc_rent_550_649","acc_rent_650_749","acc_rent_750_849","acc_rent_850_949","acc_rent_950_plus"]

In [4]:
def generate_accomodation_preferences() -> np.array:
    # order is the same defined in Attributes.md in the "Accomodation theme > Attributes" section
    prefs = np.zeros(len(ACCOMODATION_ATTR_NAMES), dtype=int)

    # get random values for user inputs
    # -----
    importance = random.choices(IMPORTANCE_VALUES, weights=[1, 2, 3, 3])[0]

    house_checked = bool(random.getrandbits(1))
    apartment_checked = bool(random.getrandbits(1))
    # at least one style box must be checked or else detault to use both
    if not house_checked and not apartment_checked:
        house_checked = True
        apartment_checked = True

    onebed_checked = bool(random.getrandbits(1))
    twobed_checked = bool(random.getrandbits(1))
    threebed_checked = bool(random.getrandbits(1))
    fourplusbed_checked = bool(random.getrandbits(1))
    # at least one bed box must be selected or else default to use all
    if not onebed_checked and not twobed_checked and not threebed_checked and not fourplusbed_checked:
        onebed_checked = True
        twobed_checked = True
        threebed_checked = True
        fourplusbed_checked = True

    shared_checked = bool(random.getrandbits(1))
    rent_value = int(abs(random.normalvariate(500, 200)))

    # generate preferences
    # -----
    # acc_rented_house_relative
    prefs[0] = importance if house_checked else 0
    # acc_house_onebed
    prefs[1] = importance if house_checked and onebed_checked else 0
    # acc_house_twobed
    prefs[2] = importance if house_checked and twobed_checked else 0
    # acc_house_threebed
    prefs[3] = importance if house_checked and threebed_checked else 0
    # acc_house_fourplusbed
    prefs[4] = importance if house_checked and fourplusbed_checked else 0
    # acc_rented_apartment_relative
    prefs[5] = importance if apartment_checked else 0
    # acc_apartment_onebed
    prefs[6] = importance if apartment_checked and onebed_checked else 0
    # acc_apartment_twobed
    prefs[7] = importance if apartment_checked and twobed_checked else 0
    # acc_apartment_threebed
    prefs[8] = importance if apartment_checked and threebed_checked else 0
    # acc_apartment_fourplusbed
    prefs[9] = importance if apartment_checked and fourplusbed_checked else 0
    # acc_shared_relative
    prefs[10] = importance if shared_checked else 0
    # acc_rent_1_74
    prefs[11] = importance
    # acc_rent_75_99
    prefs[12] = importance if rent_value > 75 else 0
    # acc_rent_100_149
    prefs[13] = importance if rent_value > 100 else 0
    # acc_rent_150_199
    prefs[14] = importance if rent_value > 150 else 0
    # acc_rent_200_224
    prefs[15] = importance if rent_value > 200 else 0
    # acc_rent_225_274
    prefs[16] = importance if rent_value > 225 else 0
    # acc_rent_275_349
    prefs[17] = importance if rent_value > 275 else 0
    # acc_rent_350_449
    prefs[18] = importance if rent_value > 350 else 0
    # acc_rent_450_549
    prefs[19] = importance if rent_value > 450 else 0
    # acc_rent_550_649
    prefs[20] = importance if rent_value > 550 else 0
    # acc_rent_650_749
    prefs[21] = importance if rent_value > 650 else 0
    # acc_rent_750_849
    prefs[22] = importance if rent_value > 750 else 0
    # acc_rent_850_949
    prefs[23] = importance if rent_value > 850 else 0
    # acc_rent_950_plus
    prefs[24] = importance if rent_value > 950 else 0

    return prefs

## Commute attributes

See [Attributes.md](attributes.md#commute-theme) for a detailed description of this section in the user input form and how the preferences for each attribute are derived accordingly.

In [5]:
COMMUTE_ATTR_NAMES = ["com_max_dist", "com_swinbourne_hawthorn","com_swinbourne_croydon","com_swinbourne_wantirna","com_deakin_burwood","com_deakin_geelong","com_deakin_warrnambool","com_federation_ballarat","com_federation_churchill","com_federation_berwick","com_federation_wimmera","com_latrobe_melbourne","com_latrobe_bendigo","com_latrobe_shepparton","com_latrobe_wodonga","com_latrobe_mildura","com_monash_clayton","com_monash_caulfield","com_monash_peninsula","com_monash_parkville","com_rmit_melbourne","com_swinburne_hawthorne","com_swinburne_croydon","com_swinburne_wantirna","com_unimelb_parkville","com_unimelb_southbank","com_unimelb_burnley","com_unimelb_dookie","com_unimelb_creswick","com_unimelb_werribee","com_unimelb_shepparton","com_vicuni_melbourne","com_vicuni_footscray","com_vicuni_stalbans","com_vicuni_sunshine","com_vicuni_werribee","com_catholic_ballarat","com_catholic_melbourne","com_torrens_melbourne"]

In [6]:
def generate_commute_preferences() -> np.array:
    # order is the same defined in Attributes.md in the "Accomodation theme > Attributes" section
    prefs = np.zeros(len(COMMUTE_ATTR_NAMES), dtype=int)

    # get random values for user inputs
    # -----
    importance = random.choices(IMPORTANCE_VALUES, weights=IMPORTANCE_WEIGHTS)[0]

    campus = random.choice(COMMUTE_ATTR_NAMES[1:])

    max_dist = random.randint(3, 30)

    # generate preferences
    # -----
    index = COMMUTE_ATTR_NAMES[1:].index(campus)
    prefs[index + 1] = importance
    prefs[0] = max_dist

    return prefs


## Industry and environment attributes

See [Attributes.md](attributes.md#industry-and-environment-theme) for a detailed description of this section in the user input form and how the preferences for each attribute are derived accordingly.

In [7]:
INDUSTRY_ATTR_NAMES = ["env_retail","env_accomodation_food","env_public_admin","env_healthcare_social_assist","env_arts_recreation","env_rental_hiring_realestate","env_parks"]

In [8]:
def generate_industry_preferences() -> np.array:
    # order is the same defined in Attributes.md in the "Accomodation theme > Attributes" section
    prefs = np.zeros(len(INDUSTRY_ATTR_NAMES), dtype=int)

    # get random values for user inputs
    # -----
    importance = random.choices(IMPORTANCE_VALUES, weights=IMPORTANCE_WEIGHTS)[0]

    retail_checked = bool(random.getrandbits(1))
    accomodation_checked = bool(random.getrandbits(1))
    admin_checked = bool(random.getrandbits(1))
    health_checked = bool(random.getrandbits(1))
    arts_checked = bool(random.getrandbits(1))
    rental_checked = bool(random.getrandbits(1))
    parks_checked = bool(random.getrandbits(1))

    # generate preferences
    # -----
    prefs[0] = importance if retail_checked else 0
    prefs[1] = importance if accomodation_checked else 0
    prefs[2] = importance if admin_checked else 0
    prefs[3] = importance if health_checked else 0
    prefs[4] = importance if arts_checked else 0
    prefs[5] = importance if rental_checked else 0
    prefs[6] = importance if parks_checked else 0

    return prefs


## Demographics

See [Attributes.md](attributes.md#demographics-theme) for a detailed description of this section in the user input form and how the preferences for each attribute are derived accordingly.

In [9]:
DEMOGRAPHICS_ATTR_NAMES = ["dem_students_relative"]

In [10]:
def generate_demographics_preferences() -> np.array:
    # order is the same defined in Attributes.md in the "Accomodation theme > Attributes" section
    prefs = np.zeros(len(DEMOGRAPHICS_ATTR_NAMES), dtype=int)

    # get random values for user inputs
    # -----
    importance = random.choices(IMPORTANCE_VALUES, weights=IMPORTANCE_WEIGHTS)[0]

    students_checked = bool(random.getrandbits(1))

    # generate preferences
    # -----
    prefs[0] = importance if students_checked else 0

    return prefs


## Transport

See [Attributes.md](attributes.md#transport-theme) for a detailed description of this section in the user input form and how the preferences for each attribute are derived accordingly.

In [11]:
TRANSPORT_ATTR_NAMES = ["tra_train","tra_bus","tra_tram"]

In [12]:
def generate_transport_preferences() -> np.array:
    # order is the same defined in Attributes.md in the "Accomodation theme > Attributes" section
    prefs = np.zeros(len(TRANSPORT_ATTR_NAMES), dtype=int)

    # get random values for user inputs
    # -----
    importance = random.choices(IMPORTANCE_VALUES, weights=IMPORTANCE_WEIGHTS)[0]

    train_checked = bool(random.getrandbits(1))
    bus_checked = bool(random.getrandbits(1))
    tram_checked = bool(random.getrandbits(1))

    # generate preferences
    # -----
    prefs[0] = importance if train_checked else 0
    prefs[1] = importance if bus_checked else 0
    prefs[2] = importance if tram_checked else 0

    return prefs


## Safety

See [Attributes.md](attributes.md#safety-theme) for a detailed description of this section in the user input form and how the preferences for each attribute are derived accordingly.

In [13]:
SAFETY_ATTR_NAMES = ["saf_crime_person","saf_crime_property","saf_drug_offences","saf_order_security","saf_justice_procedure","saf_other"]

In [14]:
def generate_safety_preferences() -> np.array:
    # order is the same defined in Attributes.md in the "Accomodation theme > Attributes" section
    prefs = np.zeros(len(SAFETY_ATTR_NAMES), dtype=int)

    # get random values for user inputs
    # -----
    importance = random.choices(IMPORTANCE_VALUES, weights=IMPORTANCE_WEIGHTS)[0]

    person_checked = bool(random.getrandbits(1))
    property_checked = bool(random.getrandbits(1))
    drug_checked = bool(random.getrandbits(1))
    order_checked = bool(random.getrandbits(1))
    justice_checked = bool(random.getrandbits(1))
    other_checked = bool(random.getrandbits(1))

    # generate preferences
    # -----
    prefs[0] = importance if person_checked else 0
    prefs[1] = importance if property_checked else 0
    prefs[2] = importance if drug_checked else 0
    prefs[3] = importance if order_checked else 0
    prefs[4] = importance if justice_checked else 0
    prefs[5] = importance if other_checked else 0

    return prefs


## Combining it all together!

In [15]:
ATTR_NAMES = ACCOMODATION_ATTR_NAMES + COMMUTE_ATTR_NAMES + INDUSTRY_ATTR_NAMES + DEMOGRAPHICS_ATTR_NAMES + TRANSPORT_ATTR_NAMES + SAFETY_ATTR_NAMES

In [16]:
def generate_preferences() -> list:
    return generate_accomodation_preferences().tolist() + generate_commute_preferences().tolist() + generate_industry_preferences().tolist() + generate_demographics_preferences().tolist() + generate_transport_preferences().tolist() + generate_safety_preferences().tolist()
    

In [17]:
data = np.zeros((NUM_PREFERENCES_GENERATE, len(ATTR_NAMES)), dtype=int)

for i in range(NUM_PREFERENCES_GENERATE):
    data[i] = generate_preferences()

data_df = pd.DataFrame(data, columns=ATTR_NAMES)
data_df.shape
data_df.head()

(100000, 81)

Unnamed: 0,acc_rented_house_relative,acc_house_onebed,acc_house_twobed,acc_house_threebed,acc_house_fourplusbed,acc_rented_apartment_relative,acc_apartment_onebed,acc_apartment_twobed,acc_apartment_threebed,acc_apartment_fourplusbed,...,dem_students_relative,tra_train,tra_bus,tra_tram,saf_crime_person,saf_crime_property,saf_drug_offences,saf_order_security,saf_justice_procedure,saf_other
0,3,0,3,3,0,0,0,0,0,0,...,0,3,3,0,0,0,0,0,0,0
1,3,3,0,0,0,3,3,0,0,0,...,2,3,0,3,2,2,2,2,0,2
2,0,0,0,0,0,2,2,2,2,2,...,2,0,3,3,0,2,0,0,2,2
3,0,0,0,0,0,3,0,3,3,3,...,1,1,0,1,3,0,3,0,3,0
4,2,0,2,0,0,2,0,2,0,0,...,3,0,0,0,0,0,3,0,3,3


In [120]:
data_df.to_csv(os.path.join(DERIVED_DATA_PATH, "GeneratedPreferences.csv"), index=False)