In [1]:
from pydantic import BaseModel
from openai import OpenAI
from dotenv import load_dotenv
import os
from datetime import datetime
import pandas as pd
import json

In [2]:
variable = 'proposed_adding'

directory = "data"
data_file = "housing_descriptions_training.csv"
data_path = directory + "/" + data_file

data_df = pd.read_csv(data_path)
data_df.head()


Unnamed: 0,short_description,entitlement,proposed_adding,residential_add,adu_udu_add,multi_family_add,single_family_add,has_residential,has_market_rate,has_affordable_lowinc,has_livework,has_adu,has_udu,has_adu_udu,has_multi_family,has_single_family,has_non_res_sqft,has_mixeduse
0,,ADDITIONAL GRADING IN COMPLIANCE WITH AND TO A...,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,,DELETE CONDITION S-3(I)(A) OF VTT 71898,49.0,49.0,0.0,49.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,,DEMOLITION OF EXISTING BUILDING TO CREATE TWO ...,75.0,75.0,0.0,75.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,,"INCREASE GRADING, 2 NEW RETAINING WALLS TO COR...",1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,,MODIFICATION FROM AN APPROVED 12 UNIT DENSITY ...,12.0,12.0,0.0,12.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [3]:
# Calculate sizes for 90% train set and 10% test set
train_size = int(len(data_df) * 0.9)
test_size = len(data_df) - train_size

# Sample 90% of rows for training set
train_df = data_df.sample(n=train_size, random_state=42)

# Use remaining 10% for test set, ensuring no overlap with train set
test_df = data_df[~data_df.index.isin(train_df.index)]

# Extract only the specified columns for both sets
train_df = train_df[['short_description', 'entitlement', variable]]
test_df = test_df[['short_description', 'entitlement', variable]]

print("Training set shape:", train_df.shape)
print("Test set shape:", test_df.shape)

# Display first few rows of each set
print("\nTraining set sample:")
train_df.head()

Training set shape: (3976, 3)
Test set shape: (442, 3)

Training set sample:


Unnamed: 0,short_description,entitlement,proposed_adding
1128,"CONSTRUCTION OF A NEW 2,838 SQFT SINGLE FAMILY...",PURSUANT TO LAMC SECTION 11.5.7(C) THE APPLICA...,1.0
4109,TRACT FOR 5 SMALL LOT SUBDIVISION CASE.,"PURSUANT TO LAMC 17.03, A REQUEST FOR A VESTIN...",5.0
3538,PROPOSED SMALL LOT SUBDIVISION TO CREATE 18 SI...,"PURSUANT TO LAMC 17.15, A VESTING TENTATIVE TR...",18.0
3613,"PURSUANT TO LAMC CODE SECTION 14.3.1, PROPOSED...","PURSUANT TO LAMC CODE SECTION 14.3.1, PROPOSED...",30.0
1861,DEMOLITION OF (E) MULTI-FAMILY STRUCTURE AND C...,PURSUANT TO LAMC SECTION 12.20 TO REQUEST A CO...,1.0


In [4]:
# instructions = """
# Instructions:
# You are a housing assistant specializing in analyzing housing projects. Your job is to read the description of a housing project and its entitlement details to extract key information.

# Task:
# Determine whether the project is a multifamily housing development.

# Output 1 if the project is multifamily housing (e.g., apartments, condominiums, townhouses, or any residential structure with multiple units).
# Output 0 if the project is not multifamily housing (e.g., single-family homes, commercial buildings, or mixed-use projects without residential units).
# Input Example:
# "The project consists of a 5-story apartment building with 100 residential units and shared amenities."

# Expected Output:
# 1

# """

instructions = """
Instructions:
You are a housing assistant specializing in analyzing housing projects. Your job is to read the entitlement and description columns of a housing project and extract key information.

Task:
Extract the numerical amount of units (dwelling units, apartments, condos, housing units, residential units, etc.) proposed added from the project description.
If a number is associated with housing unit-related terms (e.g., "3 dwelling units," "73 residential condos," "2-unit apartment building"), extract that number.
Return the total number of units added proposed as an integer.
SFH or SFD or Single Family Home or something similar would indicate that 1 unit is being added. If 5 SFD are built then 5 units are added.
If the number of units is not mentioned explicitly, return -1.

Input Example:
Short Description:
"A DENSITY BONUS (3 OFF-MENU INCENTIVES) AND CONDITIONAL USE PERMIT TO EXCEED DENSITY TO ALLOW THE NEW CONSTRUCTION, USE AND MAINTENANCE OF A 5-STORY, 70-UNIT, LOW-INCOME SENIOR HOUSING PROJECT, PHP"
Entitlement:
"PURSUANT TO 12.22.A.25; A DENSITY BONUS TO ALLOW THE NEW CONSTRUCTION, USE AND MAINTENANCE OF A 5-STORY, 70-UNIT SENIOR HOUSING PROJECT WITH GREATER THAN 35% INCREASE IN DENSITY WITH OFF-MENU INCENTIVES TO ALLOW OVERALL HEIGHT OF 72-FEET IN LIEU OF THE LAMC HEIGHT OF 45-FEET, ADDITIONAL INCENTIVES TO REDUCE ACCESSIBLE PARKING SPACES; AND ALLOW THE INCREASE IN THE FAR FROM 1.5:1 TO 2.63:1 WITH 17 UNITS SET ASIDE FOR SENIOR LOW-INCOME.
PURSUANT TO 12.24.U.26, A CONDITIONAL USE PERMIT TO REQUEST A 167.5 % DENSITY BONUS AND RESERVING 64% (17) LOW INCOME UNIT 
HOUSING DATA: 53 MARKET RATE, 17 LOW INCOME UNITS FOR SENIORS"

Expected Output:
70
"""

In [5]:

# Prepare the data for fine-tuning
def prepare_finetune_data(df):
    examples = []
    
    for _, row in df.iterrows():
        # Get the text input (short_description and entitlement)
        short_desc = str(row['short_description']) if not pd.isna(row['short_description']) else ""
        entitlement = str(row['entitlement']) if not pd.isna(row['entitlement']) else ""
        
        # Combine the text inputs
        text = f"Short Description: {short_desc}\nEntitlement: {entitlement}"
        
        # Get the label (proposed_adding)
        label = row['proposed_adding']
        
        # Create the example in the required format
        example = {
            "messages": [
                {"role": "system", "content": instructions},
                {"role": "user", "content": text},
                {"role": "assistant", "content": str(label)}
            ]
        }
        
        examples.append(example)
    
    return examples

# Prepare training and test data
train_examples = prepare_finetune_data(train_df)
test_examples = prepare_finetune_data(test_df)

# Create directory if it doesn't exist
os.makedirs('finetune_data', exist_ok=True)

# Export training data
with open(f'finetune_data/train_{variable}.jsonl', 'w') as f:
    for example in train_examples:
        f.write(json.dumps(example) + '\n')

# Export test data
with open(f'finetune_data/test_{variable}.jsonl', 'w') as f:
    for example in test_examples:
        f.write(json.dumps(example) + '\n')

print(f"Exported {len(train_examples)} training examples and {len(test_examples)} test examples to JSONL files.")



Exported 3976 training examples and 442 test examples to JSONL files.


In [6]:
load_dotenv()
key = os.environ.get("OPENAI_API_KEY")

In [7]:
client = OpenAI(api_key=key)


train_output_file_path = f"finetune_data/train_{variable}.jsonl"
validation_output_file_path = f"finetune_data/test_{variable}.jsonl"

train_file = client.files.create(
  file=open(train_output_file_path, "rb"),
  purpose="fine-tune"
)

valid_file = client.files.create(
  file=open(validation_output_file_path, "rb"),
  purpose="fine-tune"
)

print(f"Training file Info: {train_file}")
print(f"Validation file Info: {valid_file}")

Training file Info: FileObject(id='file-C5ubYxbwkHyFHBBiMWX6zH', bytes=9086996, created_at=1742347351, filename='train_proposed_adding.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None, expires_at=None)
Validation file Info: FileObject(id='file-NAgqR8WVKB335FvALurPvq', bytes=999455, created_at=1742347352, filename='test_proposed_adding.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None, expires_at=None)


In [8]:
model = client.fine_tuning.jobs.create(
  training_file=train_file.id, 
  validation_file=valid_file.id,
  model="gpt-4o-mini-2024-07-18", 
  hyperparameters={
    "n_epochs": 2,
	"batch_size": 8,
	"learning_rate_multiplier": 0.2
  },
  suffix=f"housing_desc_trainsetfull_{variable}"  # Added tag to identify this run
)
job_id = model.id
status = model.status

print(f'Fine-tuning model with jobID: {job_id}.')
print(f"Training Response: {model}")
print(f"Training Status: {status}")
print(f"Run tag: housing_desc_run_1")

Fine-tuning model with jobID: ftjob-YXfCYmJY0NUFH4jO9oWwWiQn.
Training Response: FineTuningJob(id='ftjob-YXfCYmJY0NUFH4jO9oWwWiQn', created_at=1742347354, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size=8, learning_rate_multiplier=0.2, n_epochs=2), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-J2DCfq0j1nXInCHinBoPqK5v', result_files=[], seed=1416791431, status='validating_files', trained_tokens=None, training_file='file-C5ubYxbwkHyFHBBiMWX6zH', validation_file='file-NAgqR8WVKB335FvALurPvq', estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size=8, learning_rate_multiplier=0.2, n_epochs=2)), type='supervised'), user_provided_suffix='housing_desc_trainsetfull_proposed_adding', metadata=None)
Training Status: validating_files
Run tag: housing_desc_run_1


In [19]:
client.fine_tuning.jobs.retrieve(job_id)

FineTuningJob(id='ftjob-YXfCYmJY0NUFH4jO9oWwWiQn', created_at=1742347354, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal:housing-desc-trainsetfull-proposed-adding:BCd0IIYF', finished_at=1742349565, hyperparameters=Hyperparameters(batch_size=8, learning_rate_multiplier=0.2, n_epochs=2), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-J2DCfq0j1nXInCHinBoPqK5v', result_files=['file-2UPwgogAL5taP2n5VerKgu'], seed=1416791431, status='succeeded', trained_tokens=4860268, training_file='file-C5ubYxbwkHyFHBBiMWX6zH', validation_file='file-NAgqR8WVKB335FvALurPvq', estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size=8, learning_rate_multiplier=0.2, n_epochs=2)), type='supervised'), user_provided_suffix='housing_desc_trainsetfull_proposed_adding', metadata=None)

In [20]:
result = client.fine_tuning.jobs.list()

# Retrieve the fine tuned model
fine_tuned_model = result.data[0].fine_tuned_model
print(fine_tuned_model)

None


In [21]:
# fine_tuned_model = "ft:gpt-4o-mini-2024-07-18:personal:housing-desc-trainset100-proposed-adding-run1:BCZBYUm5"
fine_tuned_model = "ft:gpt-4o-mini-2024-07-18:personal:housing-desc-trainsetfull-proposed-adding:BCd0IIYF"

In [22]:
test_truelabels_ls = []
test_text_ls = []
for _, row in test_df.iterrows():
        # Get the text input (short_description and entitlement)
        short_desc = str(row['short_description']) if not pd.isna(row['short_description']) else ""
        entitlement = str(row['entitlement']) if not pd.isna(row['entitlement']) else ""
        
        # Combine the text inputs
        text = f"Short Description: {short_desc}\nEntitlement: {entitlement}"
        
        # Get the label (proposed_adding)
        label = row['proposed_adding']
        if pd.isna(label):
                label = -1

        test_text_ls.append(text)
        test_truelabels_ls.append(label)
        

In [23]:
type(test_truelabels_ls[1])

float

In [24]:
class Model(BaseModel):
    predicted_label: int


predlabels_ls = []
for comment in test_text_ls:
  completion = client.beta.chat.completions.parse(
    model=fine_tuned_model,  # Use the fine-tuned model instead of base model
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": comment},
    ],
    response_format=Model,
)
  print(f'getting prediction for: {comment}')
  pred = completion.choices[0].message.parsed
  
  print(f'predicted label: {pred.predicted_label}')
  # predlabels_ls.append(pred.predicted_label)
  predlabels_ls.append(pred.predicted_label)

predlabels_ls

getting prediction for: Short Description: 
Entitlement: MODIFICATION FROM AN APPROVED 12 UNIT DENSITY BONUS MULTI-FAMILY STRUCTURE TO A 10 UNIT MULTI FAMILY WITH NO DENSITY BONUS OR INCENTIVES. BUILDING HEIGHT, SIZE, SF FOOTAGE AND OVERALL DESIGN SHALL ALL REMAIN THE SAME
predicted label: 10
getting prediction for: Short Description: 
Entitlement: MODIFICATION REQUEST TO CHANGE LOT AREA AND SUBMIT REVISED TREE REPORT
predicted label: 10
getting prediction for: Short Description: 
Entitlement: MODIFY LOT 2 AND LOT 5 OF APPROVED MAP VTT-70452-CN.
predicted label: 14
getting prediction for: Short Description:  A MAXIMUM 4 SMALL LOT SUBDIVISION
Entitlement: IN ACCORDANCE WITH PROVISIONS OF LOS ANGELES MUNICIPAL CODE (LAMC) SECTIONS 17.53 A REQUEST FOR A MAXIMUM 4 SMALL LOT SUBDIVISION ON A 6,500 SQUARE FOOT LOT IN THE R3-1-RIO.
predicted label: 4
getting prediction for: Short Description: (N) 2,362 SQ FT, TWO-STORY SFD W/ ATTACHED TWO-CAR GARAGE
Entitlement: PURSUANT TO LAMC SECTION 12.24

[10,
 10,
 14,
 4,
 1,
 53,
 160,
 1,
 1,
 1,
 16,
 -1,
 11,
 3,
 125,
 15,
 15,
 1,
 20,
 24,
 27,
 284,
 1,
 39,
 40,
 5,
 5,
 8,
 1,
 43,
 132,
 1,
 1,
 108,
 2,
 1,
 2,
 1,
 1,
 4,
 4,
 2,
 2,
 4,
 51,
 1,
 3,
 1,
 1,
 8,
 9,
 10,
 2,
 1,
 52,
 1,
 1,
 69,
 1,
 248,
 1,
 2,
 2,
 188,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 10,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 3,
 1,
 84,
 1,
 1,
 1,
 1,
 1,
 2,
 236,
 4,
 79,
 1,
 0,
 1,
 577,
 40,
 1,
 1,
 2,
 2,
 1,
 1,
 40,
 17,
 7,
 203,
 4,
 2,
 1,
 1,
 1,
 21,
 1,
 1,
 25,
 1,
 1,
 1,
 0,
 1,
 15,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 77,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 33,
 1,
 228,
 1,
 1,
 48,
 2,
 1,
 2,
 4,
 1,
 1,
 64,
 25,
 6,
 12,
 323,
 3,
 4,
 1,
 33,
 34,
 1,
 52,
 1,
 499,
 46,
 77,
 36,
 57,
 1,
 3,
 44,
 1,
 1,
 1,
 5,
 2,
 5,
 63,
 1,
 1,
 15,
 56,
 1,
 9,
 3,
 25,
 1,
 1,
 1,
 59,
 1,
 4,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 41,
 46,
 5,
 1,
 60,
 19,
 5,
 50,
 4,
 157,
 21,
 75,
 18,
 187,
 47,
 516,
 1444,
 3,
 48,
 2,
 1,
 21,

In [25]:
# Calculate accuracy between predicted labels and true labels
correct_predictions = sum(1 for pred, true in zip(predlabels_ls, test_truelabels_ls) if pred == true)
total_predictions = len(test_truelabels_ls)
accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0

print(f"Accuracy: {accuracy:.4f} ({correct_predictions}/{total_predictions})")

# # You can also use sklearn for more metrics if needed
# from sklearn.metrics import accuracy_score, mean_squared_error, r2_score

# # Since this appears to be a regression task (predicting housing units as floats)
# # we can also calculate regression metrics
# mse = mean_squared_error(test_truelabels_ls, predlabels_ls)
# r2 = r2_score(test_truelabels_ls, predlabels_ls)

# print(f"Mean Squared Error: {mse:.4f}")
# print(f"R² Score: {r2:.4f}")


Accuracy: 0.8100 (358/442)


In [26]:
correct = 0
total = 0
for i in range(100):
  if predlabels_ls[i] == -1:
    continue
  
  if test_truelabels_ls[i] == predlabels_ls[i]:
    correct += 1

  total += 1

correct/total

0.8080808080808081

In [27]:
# Print examples where the predicted label is -1
print("Examples where predicted label is -1:")
count = 0
for i, pred_label in enumerate(predlabels_ls):
    if pred_label == -1 and count < 99:  # Limiting to 5 examples for readability
        print(f"\nExample {i}:")
        print(f"Text: {test_text_ls[i]}")
        print(f"True label: {test_truelabels_ls[i]}")
        count += 1


Examples where predicted label is -1:

Example 11:
Text: Short Description: a. An Off-Menu Incentive to increase the maximum Floor Area Ratio from 1.5 to
Entitlement: 
True label: -1

Example 323:
Text: Short Description: PRELIMINARY PARCEL MAP
Entitlement: PRELIMINARY PARCEL MAP/COASTAL DEVELOPMENT PERMIT
True label: 2.0

Example 360:
Text: Short Description: Q CLARIFICATION PER 12.32-H TO MODIFY PREVIOUSLY APPROVED SITE PLAN BY DECREASING HEIGHT FROM 32 STORIES AND 378-FEET TO 23 STORIES AND 288-FEET AND INCREASING SETBACK FOR PROPOSED SOUTH TOWER LEVELS
Entitlement: Q CLARIFICATION PER 12.32-H TO MODIFY PREVIOUSLY APPROVED SITE PLAN BY DECREASING HEIGHT FROM 32 STORIES AND 378-FEET TO 23 STORIES AND 288-FEET AND INCREASING SETBACK FOR PROPOSED SOUTH TOWER LEVELS 2-3 FROM 25-FEET TO 33-FEET 3-INCHES ALONG WILSHIRE BLVD. NO CHANGES TO THE SOUTH TOWER'S GROUND FLOOR SETBACK OF 5-FEET. FLOOR PLATES WILL CHANGE FROM CLARIFICATION PER 12.32-H TO MODIFY PREVIOUSLY APPROVED SITE PLAN
True l

In [28]:
# Print examples where the predicted label is not equal to the true label
print("Examples where predicted label != true label:")
count = 0
for i, (pred_label, true_label) in enumerate(zip(predlabels_ls, test_truelabels_ls)):
    if pred_label != -1 and pred_label != true_label and count < 99:  # Limiting to 10 examples for readability
        print(f"\nExample {i}:")
        print(f"Text: {test_text_ls[i]}")
        print(f"True label: {true_label}")
        print(f"Predicted label: {pred_label}")
        count += 1


Examples where predicted label != true label:

Example 0:
Text: Short Description: 
Entitlement: MODIFICATION FROM AN APPROVED 12 UNIT DENSITY BONUS MULTI-FAMILY STRUCTURE TO A 10 UNIT MULTI FAMILY WITH NO DENSITY BONUS OR INCENTIVES. BUILDING HEIGHT, SIZE, SF FOOTAGE AND OVERALL DESIGN SHALL ALL REMAIN THE SAME
True label: 12.0
Predicted label: 10

Example 1:
Text: Short Description: 
Entitlement: MODIFICATION REQUEST TO CHANGE LOT AREA AND SUBMIT REVISED TREE REPORT
True label: 6.0
Predicted label: 10

Example 2:
Text: Short Description: 
Entitlement: MODIFY LOT 2 AND LOT 5 OF APPROVED MAP VTT-70452-CN.
True label: 396.0
Predicted label: 14

Example 10:
Text: Short Description: 1. Density Bonus (DB) for a Density Bonus Compliance Review to permit a Housing Development Project requesting the following Off-Menu Incentives and Waiver of Development Standard:
Entitlement: 1. Density Bonus (DB) pursuant to Section 12.22 A.25 of the LAMC for a Density
True label: -1
Predicted label: 16

Ex

In [29]:
# Calculate the average difference between true and predicted labels for incorrect predictions
total_diff = 0
incorrect_count = 0

for i, (pred_label, true_label) in enumerate(zip(predlabels_ls, test_truelabels_ls)):
    if pred_label != -1 and pred_label != true_label:
        diff = abs(true_label - pred_label)
        total_diff += diff
        incorrect_count += 1

if incorrect_count > 0:
    avg_diff = total_diff / incorrect_count
    print(f"Average difference between true and predicted labels for incorrect predictions: {avg_diff}")
else:
    print("No incorrect predictions found (excluding cases where predicted label is -1).")


Average difference between true and predicted labels for incorrect predictions: 54.18518518518518


In [30]:
# Calculate how many incorrect predictions are off by only 1
off_by_one_count = 0
total_incorrect = 0

for pred_label, true_label in zip(predlabels_ls, test_truelabels_ls):
    if pred_label != -1 and pred_label != true_label:
        total_incorrect += 1
        if abs(true_label - pred_label) == 1:
            off_by_one_count += 1

if total_incorrect > 0:
    percentage_off_by_one = (off_by_one_count / total_incorrect) * 100
    print(f"Number of predictions off by exactly 1: {off_by_one_count} out of {total_incorrect} incorrect predictions")
    print(f"Percentage of incorrect predictions that are off by exactly 1: {percentage_off_by_one:.2f}%")
else:
    print("No incorrect predictions found (excluding cases where predicted label is -1).")


Number of predictions off by exactly 1: 30 out of 81 incorrect predictions
Percentage of incorrect predictions that are off by exactly 1: 37.04%
