In [1]:
from pydantic import BaseModel
from openai import OpenAI
from dotenv import load_dotenv
import os
from datetime import datetime
import pandas as pd
import json

In [4]:
variable = 'proposed_adding'
fine_tuned_model = "ft:gpt-4o-mini-2024-07-18:personal:housing-desc-trainset100-proposed-adding-run1:BCZBYUm5"


directory = "data"
data_file = "housing_descriptions_rawdata.csv"
data_path = directory + "/" + data_file

data_df = pd.read_csv(data_path)
data_df.head()

Unnamed: 0,caseId,short_description,entitlement
0,214839,REQUEST FOR A CERTIFICATE OF COMPLIANCE TO COR...,REQUEST FOR A CERTIFICATE OF COMPLIANCE FOR A ...
1,189626,LOT LINE ADJUSTMENT,LOT LINE ADJUSTMENT BETWEEN TWO VACANT LOTS.
2,172754,,AMENDMENTS TO ADD SUSTAINABILITY GUIDELINES TO...
3,142884,,REDRILLING OF OIL WELL.
4,141916,,ROOF-MOUNTED WIRELESS CELLULAR.


In [6]:
# Take a random sample of 100 rows from the dataframe
data_df = data_df.sample(n=100, random_state=42)

# Display the first few rows of the sample
data_df.head()


Unnamed: 0,caseId,short_description,entitlement
8092,236153,"THE SALE, DISPENSATION AND ON-SITE CONSUMPTION...","PURSUANT TO LAMC SECTION 12.24 W 1, A CONDITIO..."
28137,184100,"CONVERSION OF EXISTING 100,286 SF IND. BLDG. I...","TENTATIVE TRACT, ZAD, EAFA ZONE VARIANCE, FROM..."
64217,241662,ADMINISTRATIVE REVIEW FOR COMPLIANCE WITH THE ...,ADMINISTRATIVE REVIEW FOR COMPLIANCE WITH THE ...
17617,250328,INSTALL (2) FABRICATED ALLUMINUM REVERSE CHANN...,CRENSHAW CORRIDOR SPECIFIC PLAN OVERLAY REVIEW
108277,183672,CUB TO ALLOW THE SALE OF ALCOHOLIC BEVERAGES F...,"PURSUANT TO SECTION 12.24-W.1, A CUB FOR THE S..."


In [7]:
# instructions = """
# Instructions:
# You are a housing assistant specializing in analyzing housing projects. Your job is to read the description of a housing project and its entitlement details to extract key information.

# Task:
# Determine whether the project is a multifamily housing development.

# Output 1 if the project is multifamily housing (e.g., apartments, condominiums, townhouses, or any residential structure with multiple units).
# Output 0 if the project is not multifamily housing (e.g., single-family homes, commercial buildings, or mixed-use projects without residential units).
# Input Example:
# "The project consists of a 5-story apartment building with 100 residential units and shared amenities."

# Expected Output:
# 1

# """

instructions = """
Instructions:
You are a housing assistant specializing in analyzing housing projects. Your job is to read the entitlement and description columns of a housing project and extract key information.

Task:
Extract the numerical amount of units (dwelling units, apartments, condos, housing units, residential units, etc.) proposed added from the project description.
If a number is associated with housing unit-related terms (e.g., "3 dwelling units," "73 residential condos," "2-unit apartment building"), extract that number.
Return the total number of units added proposed as an integer.
SFH or SFD or Single Family Home or something similar would indicate that 1 unit is being added. If 5 SFD are built then 5 units are added.
If the number of units is not mentioned explicitly, return -1.

Input Example:
Short Description:
"A DENSITY BONUS (3 OFF-MENU INCENTIVES) AND CONDITIONAL USE PERMIT TO EXCEED DENSITY TO ALLOW THE NEW CONSTRUCTION, USE AND MAINTENANCE OF A 5-STORY, 70-UNIT, LOW-INCOME SENIOR HOUSING PROJECT, PHP"
Entitlement:
"PURSUANT TO 12.22.A.25; A DENSITY BONUS TO ALLOW THE NEW CONSTRUCTION, USE AND MAINTENANCE OF A 5-STORY, 70-UNIT SENIOR HOUSING PROJECT WITH GREATER THAN 35% INCREASE IN DENSITY WITH OFF-MENU INCENTIVES TO ALLOW OVERALL HEIGHT OF 72-FEET IN LIEU OF THE LAMC HEIGHT OF 45-FEET, ADDITIONAL INCENTIVES TO REDUCE ACCESSIBLE PARKING SPACES; AND ALLOW THE INCREASE IN THE FAR FROM 1.5:1 TO 2.63:1 WITH 17 UNITS SET ASIDE FOR SENIOR LOW-INCOME.
PURSUANT TO 12.24.U.26, A CONDITIONAL USE PERMIT TO REQUEST A 167.5 % DENSITY BONUS AND RESERVING 64% (17) LOW INCOME UNIT 
HOUSING DATA: 53 MARKET RATE, 17 LOW INCOME UNITS FOR SENIORS"

Expected Output:
70
"""

In [8]:
load_dotenv()
key = os.environ.get("OPENAI_API_KEY")

In [9]:
client = OpenAI(api_key=key)

In [10]:
test_text_ls = []
for _, row in data_df.iterrows():
        # Get the text input (short_description and entitlement)
        short_desc = str(row['short_description']) if not pd.isna(row['short_description']) else ""
        entitlement = str(row['entitlement']) if not pd.isna(row['entitlement']) else ""
        
        # Combine the text inputs
        text = f"Short Description: {short_desc}\nEntitlement: {entitlement}"

        test_text_ls.append(text)
        

In [12]:
test_text_ls[1]

'Short Description: CONVERSION OF EXISTING 100,286 SF IND. BLDG. INTO 57 JOINT LIVE WORK QUARTERS FOR ARTISTS AND ARTISANS CONDOMINUM UNITS AND ONE COMMERCIAL CONDOMINIUM UNIT IN THE M3-1 ZONE.\nEntitlement: TENTATIVE TRACT, ZAD, EAFA ZONE VARIANCE, FROM SECTION 12.21-A,5 PURSUANT TO SECTION 12.27 OF THE LAMC, TO ALLOW DEVIATIONS FROM THE PROVISIONS OF AFOREMENTIIONED SECTION AS SPECIFIED IN THE REQUESTED ACTION ITEM (ATTACHMENT A)OF THE AMENDED MASTER LAND USE PERMIT APPLICATION.'

In [13]:
class Model(BaseModel):
    predicted_label: int


predlabels_ls = []
for comment in test_text_ls:
  completion = client.beta.chat.completions.parse(
    model=fine_tuned_model,  # Use the fine-tuned model instead of base model
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": comment},
    ],
    response_format=Model,
)
  print(f'getting prediction for: {comment}')
  pred = completion.choices[0].message.parsed
  
  print(f'predicted label: {pred.predicted_label}')
  # predlabels_ls.append(pred.predicted_label)
  predlabels_ls.append(pred.predicted_label)

predlabels_ls

getting prediction for: Short Description: THE SALE, DISPENSATION AND ON-SITE CONSUMPTION OF BEER AND WINE IN CONJUNCTION WITH A 1,346 SQUARE-FOOT RESTAURANT 
Entitlement: PURSUANT TO LAMC SECTION 12.24 W 1, A CONDITIONAL USE TO ALLOW THE SALE, DISPENSATION AND ON-SITE CONSUMPTION OF BEER AND WINE IN CONJUNCTION WITH A 1,346 SQUARE-FOOT RESTAURANT WITH INDOOR SEATING FOR 40 PATRONS WITH HOURS OF OPERATION FROM 7:00 A.M. TO 11:00 P.M. DAILY. 
predicted label: -1
getting prediction for: Short Description: CONVERSION OF EXISTING 100,286 SF IND. BLDG. INTO 57 JOINT LIVE WORK QUARTERS FOR ARTISTS AND ARTISANS CONDOMINUM UNITS AND ONE COMMERCIAL CONDOMINIUM UNIT IN THE M3-1 ZONE.
Entitlement: TENTATIVE TRACT, ZAD, EAFA ZONE VARIANCE, FROM SECTION 12.21-A,5 PURSUANT TO SECTION 12.27 OF THE LAMC, TO ALLOW DEVIATIONS FROM THE PROVISIONS OF AFOREMENTIIONED SECTION AS SPECIFIED IN THE REQUESTED ACTION ITEM (ATTACHMENT A)OF THE AMENDED MASTER LAND USE PERMIT APPLICATION.
predicted label: 57
gettin

[-1,
 57,
 2,
 -1,
 -1,
 -1,
 136,
 -1,
 -1,
 5,
 -1,
 -1,
 4,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 20,
 -1,
 -1,
 -1,
 1,
 -1,
 4,
 2,
 6,
 -1,
 2,
 44,
 -1,
 2,
 -1,
 -1,
 -1,
 153,
 8,
 -1,
 6,
 1,
 -1,
 2,
 1,
 2,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 3,
 -1,
 60,
 -1,
 -1,
 -1,
 4,
 -1,
 20,
 -1,
 -1,
 -1,
 24,
 12,
 -1,
 -1,
 11,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 -1,
 1,
 -1,
 -1,
 -1,
 -1,
 181,
 2,
 20,
 -1,
 1,
 6,
 1,
 -1]

In [14]:
data_df[variable] = predlabels_ls
data_df.head()

Unnamed: 0,caseId,short_description,entitlement,proposed_adding
8092,236153,"THE SALE, DISPENSATION AND ON-SITE CONSUMPTION...","PURSUANT TO LAMC SECTION 12.24 W 1, A CONDITIO...",-1
28137,184100,"CONVERSION OF EXISTING 100,286 SF IND. BLDG. I...","TENTATIVE TRACT, ZAD, EAFA ZONE VARIANCE, FROM...",57
64217,241662,ADMINISTRATIVE REVIEW FOR COMPLIANCE WITH THE ...,ADMINISTRATIVE REVIEW FOR COMPLIANCE WITH THE ...,2
17617,250328,INSTALL (2) FABRICATED ALLUMINUM REVERSE CHANN...,CRENSHAW CORRIDOR SPECIFIC PLAN OVERLAY REVIEW,-1
108277,183672,CUB TO ALLOW THE SALE OF ALCOHOLIC BEVERAGES F...,"PURSUANT TO SECTION 12.24-W.1, A CUB FOR THE S...",-1


In [15]:
output_filename = f'predictions/predictions_{variable}.csv'
data_df.to_csv(output_filename, index=False)
print(f"Predictions saved to {output_filename}")

Predictions saved to predictions/predictions_proposed_adding.csv
