**Imports and Libraries**

In [33]:
import pandas as pd
import numpy as np
from pymcdm.methods import TOPSIS


**Safety Model - TOPSIS**

In [34]:
full_dataset = pd.read_csv('/Users/matthewvchou/ai-and-society-housing-finder/datasets/final.csv') # Input full path from own device

# Separating into Safety (Crime) and Livability (Food)
safety_cols = ['9', 'F', 'I', 'M', 'V']
X_safety = full_dataset[safety_cols].values

# Weights for Different Columns/Features
# From most severe to least severe --> F (Felony), M (Misdemeanor), V (Violation), I (Infraction), 9 (Misc.)
# Got these weights by asking ChatGPT
safety_weights = np.array([0.1071, 0.4464, 0.0893, 0.2232, 0.1339])

# Need to set each Column/Feature as a Net Negative
# We used the counts of each crime type, so technically a higher number means less safe
# We standardized the data, so no need to redo that part
safety_criteria = np.array([-1, -1, -1, -1, -1])

# Apply TOPSIS
safety_topsis = TOPSIS()
safety_scores = safety_topsis(X_safety, safety_weights, safety_criteria)

**Livability Model - TOPSIS**

In [35]:
# Separating into Safety (Crime) and Livability (Food)
livability_cols = ['A', 'AB', 'ABC', 'ABCD', 'ABCDK', 'ABCH', 'ABCHDK', 'ABCHK', 'ABCK', 'ABH' , 'ABHK', 'ABK', 'AC', 'ACD', 'ACDK', 'ACE', 'ACED', 'ACEDK', 'ACEWD', 'ACH', 'ACHD', 'ACHDK', 'ACHK', 'ACK', 'AD', 'ADK', 'AEHK', 'AHDK', 'AM']
X_livability = full_dataset[livability_cols].values


# Weights for Different Columns/Features
# Base Values generated by ChatGPT (based on how beneficial/comprehensive the store type is to residents)
# Will generate weights for each column based on the sum of its parts

# A (Store) = 1.0
# B (Bakery) = 0.5
# C (Food Manufacturer) = 0.3
# D (Food Warehouse) = 0.4
# E (Beverage Plant) = 0.3
# H (Wholesale Manufacturer) = 0.4
# K (Vehicle) = 0.2
# M (Salvage Dealer) = 0.1
# W (Farm Winery) = 0.3
# TOTAL = 3.5

# Go to file below to see what each code means:
# /datasets/details/livability/NYSDAM_RetailFoodStoresEstablishmentTypeCodes.pdf
gpt_base = {'A': 1,
            'B': 0.5,
            'C': 0.3,
            'D': 0.4,
            'E': 0.3,
            'H': 0.4,
            'K': 0.2,
            'M': 0.1,
            'W': 0.3}

livability_weights = np.array([])

# Sum the base values for each letter in label, divide by total
for col in livability_cols:
    total = 0
    for char in col:
        total += gpt_base[char]
    livability_weights = np.append(livability_weights, total / 3.5)
    total = 0

# Normalizing weights
livability_weights = livability_weights / livability_weights.sum()

# Need to set each Column/Feature as a Net Positive
# We used the counts of each establishment type, so a higher number means more livable/more stores (intuitively)
# We standardized the data, so no need to redo that part
livability_criteria = np.array([1] * len(livability_cols))

# Apply TOPSIS
livability_topsis = TOPSIS()
livability_scores = livability_topsis(X_livability, livability_weights, livability_criteria)

**Apply Rankings**

In [None]:
# Adding scores to new dataframe
rankings = pd.DataFrame({
    'zipcodes': full_dataset['modzcta'],
    'safety_scores': safety_scores,
    'livability_scores': livability_scores
})

# Creating actual rankings for each zipcode
rankings['safety_rank'] = rankings['safety_scores'].rank(method='min', ascending=False).astype(int)
rankings['livability_rank'] = rankings['livability_scores'].rank(method='min', ascending=False).astype(int)

# Saving to rankings.csv
rankings.to_csv('rankings.csv', index=False)

     zipcodes  safety_scores  livability_scores  safety_rank  livability_rank
0       10001       0.469868           0.032807          161              127
1       10002       0.568915           0.253458          150               11
2       10003       0.600983           0.045683          143              112
3       10004       0.968906           0.004491           24              169
4       10005       0.989290           0.003925           10              170
..        ...            ...                ...          ...              ...
172     11691       0.599550           0.034662          144              125
173     11692       0.897183           0.014967           61              157
174     11693       0.847475           0.035749           83              123
175     11694       0.954065           0.018019           32              153
176     11697       0.999535           0.000000            2              176

[177 rows x 5 columns]
