In [1]:
from pydantic import BaseModel
from openai import OpenAI
from dotenv import load_dotenv
import os
from datetime import datetime
import pandas as pd
import json

In [2]:
directory = "data"
data_file = "housing_descriptions_training.csv"
data_path = directory + "/" + data_file

data_df = pd.read_csv(data_path)
data_df.head()


Unnamed: 0,short_description,entitlement,proposed_adding,residential_add,adu_udu_add,multi_family_add,single_family_add,has_residential,has_market_rate,has_affordable_lowinc,has_livework,has_adu,has_udu,has_adu_udu,has_multi_family,has_single_family,has_non_res_sqft,has_mixeduse
0,,ADDITIONAL GRADING IN COMPLIANCE WITH AND TO A...,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,,DELETE CONDITION S-3(I)(A) OF VTT 71898,49.0,49.0,0.0,49.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,,DEMOLITION OF EXISTING BUILDING TO CREATE TWO ...,75.0,75.0,0.0,75.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,,"INCREASE GRADING, 2 NEW RETAINING WALLS TO COR...",1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,,MODIFICATION FROM AN APPROVED 12 UNIT DENSITY ...,12.0,12.0,0.0,12.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [None]:
# Sample 100 rows for training set
train_df = data_df.sample(n=100, random_state=42)

# Sample another 100 rows for test set, ensuring no overlap with train set
test_df = data_df[~data_df.index.isin(train_df.index)].sample(n=100, random_state=50)

# Extract only the specified columns for both sets
train_df = train_df[['short_description', 'entitlement', 'has_residential', 'has_market_rate', 'has_affordable_lowinc', 'has_livework', 'has_adu', 'has_mixeduse']]
test_df = test_df[['short_description', 'entitlement', 'has_residential', 'has_market_rate', 'has_affordable_lowinc', 'has_livework', 'has_adu', 'has_mixeduse']]

print("Training set shape:", train_df.shape)
print("Test set shape:", test_df.shape)

# Display first few rows of each set
print("\nTraining set sample:")
train_df.head()

KeyError: "['has_mixed'] not in index"

In [52]:
instructions = """
You are a housing project assistant. Read the **Entitlement** and **Short Description** fields. Your job is to classify whether each project has the following traits:

## VARIABLES
- **Res**: `1` if the project includes any **residential use**; `0` otherwise  
- **Mkt**: `1` if the project includes or implies any **market-rate housing**; `0` otherwise  
- **Aff**: `1` if the project includes any **LOWINC** (affordable or income-restricted) housing; `0` otherwise  
- **Liv**: `1` if the project includes any **live/work units** (residential unless explicitly commercial); `0` otherwise  
- **Adu**: `1` if the project includes any **Accessory Dwelling Units (ADUs)** or **Junior ADUs (JADUs)**; `0` otherwise  
- **Mix**: `1` if the project is **mixed-use** (includes both residential and non-residential uses); `0` otherwise


---

### Shared Definitions:


**LOWINC** refers to explicitly income-restricted housing, such as:
- Affordable housing
- Low-income, very low income, extremely low income
- Moderate income
- Supportive housing
- Income-restricted units
- Units required by a Density Bonus, Mello Act, or other affordability programs


**RES USES** (residential uses) include:
- Apartments, condos, dwelling units, single-family homes (SFD), ADUs, JADUs, duplexes, triplexes, fourplexes, live/work units (if no separate commercial), small-lot subdivisions

**Non-Residential Uses** include:
- Commercial, retail, restaurant, bar, office, administrative office, medical, industrial, warehouse
- Institutional uses: community center, school, religious facility

**Mixed-Use** means a project includes **both** residential and non-residential uses (see above)

**Live/Work Units** are considered **residential** unless explicitly stated that they include **separate or public-facing commercial space**


---

## VARIABLE: `Res`


### Task:
Determine whether the project includes **any residential use**.


### Output **1** if there is residential:
- Any of the following appear: apartments, housing units, condos, homes, duplex, triplex, fourplex, ADU, JADU, live/work units (unless explicitly non-residential)
- Even if only one residential unit is included (e.g., "1 apartment above a retail store")

### Output **0** if there is no residential:
- No residential use is mentioned
- The project only includes commercial, office, industrial, or institutional use without any housing

**Common Misleading Phrases**:
- "FAR", "Density Bonus", "Height Increase", and "Open Space Waiver" do **not** imply mixed-use by themselves.
- Garages, basements, storage, and parking are **not** considered non-residential uses.

---

## VARIABLE: `Mkt`


### Task:

### Output **0** if:
- All new or all existing **RES USES** are explicitly LOWINC, , or if the project involves **no net change** to housing quantity or type
- The only action is **legalizing** an already existing, previously unpermitted *dwelling unit*, with **no new structure or change in use**.

### Output **1** if:
- Output **1** if **any** of the following are true: new units are created or enabled, affordability is partial or missing, or units are altered in a way that increases market value.
- Any **new unit** is created: including ADUs, duplexes, or conversion of non-housing space (e.g., garage, rec room, basement) into a dwelling.
- A **unit is rebuilt**, enlarged, or significantly upgraded (e.g., demo and rebuild of SFD, or manufactured home replacement).
- An **existing unit is converted into another use** (e.g., to condos, bed & breakfast, or short-term rental).
- A **density bonus** or other incentive is used and *not all units* are LOWINC.
- A **zone change**, tract map, subdivision, or other **land entitlement** is requested and **no affordability is mentioned**.
- **Affordability is partial** (e.g., “2 of 26 units are affordable”) — remaining units are assumed market-rate.
- The language indicates **intent to redevelop, intensify, or commercialize housing** (e.g., demolition, change of use, upzoning).

### Assumptions:
- Assume **market-rate intent** unless it is clearly stated that **100% of units are LOWINC**.
- **Adding, enlarging, converting, or replacing** units implies market-rate housing unless explicitly affordable.

---

## VARIABLE: `Aff`


### Output:

- **1** if the project includes any units that are explicitly income-restricted or affordable
- **0** if there is no mention of any income-restricted, affordable, or low-income housing


### Criteria:

#### Output **1** if:
- if the project includes any units that are explicitly **LOWINC** (income-restricted or affordable)

#### Output **0** if:
- Residential units **RES USES** are included, but there is **no mention of affordability** or income restriction
- The **RES USES** are market-rate by implication (e.g., new SFDs, ADUs, apartments, condos without affordability language)
- The only activity is legalization or change-of-use (e.g., “legalize unpermitted unit”)


---

## VARIABLE: `Liv`


### Output:


### Criteria:

#### Output **1** if:
- if the project explicitly mentions live/work units
- The text contains **explicit phrases**, such as:
  - “live/work unit(s)”
  - “live-work space”
  - “live/work loft(s)”
  - “living/working quarters”

#### Output **0** if:
- otherwise
- There is **no explicit mention** of live/work units, even if the project is mixed-use, commercial-residential, or includes flexible space.


---

## VARIABLE: `Adu`

### Output **1** if:
- if the project includes an ADU
- The text explicitly mentions:
  - “ADU”
  - “Accessory Dwelling Unit”
  - “JADU” (Junior ADU)
- OR if there is strong indirect evidence:
  - “Garage conversion” (only if a unit is being added or repurposed for living space)
  - “Recreation room” or “Rec room” being added, legalized, or modified with a garage
  - “Second-story addition” to a garage or detached structure
  - “Non-conforming addition” with use or structure implying habitable space
  - Zoning language around “Accessory use”, “reduced side yard” + garage additions in residential zones
Only infer ADUs from **indirect language** when it strongly suggests a residential or second unit being added or legalized.

### Output **0** if:
- Otherwise
- There is no mention of “ADU”, “Accessory Dwelling Unit”, or “JADU”
- The project involves a garage, recreation room, or addition, but **not** as living space or housing
- The text discusses subdivision, zoning, or construction of new homes **without** referencing an ADU


---


## VARIABLE: `Mix`


### Task:
Determine whether the project is **mixed-use**, meaning it includes both **RES USE** and **Non-Residential Uses** uses.


**Mixed-Use** means a project includes **both** residential and non-residential uses (see above)
**Live/Work Units** are considered **residential** unless explicitly stated that they include **separate or public-facing commercial space**

**Common Misleading Phrases**:
- "FAR", "Density Bonus", "Height Increase", and "Open Space Waiver" do **not** imply mixed-use by themselves.
- Garages, basements, storage, and parking are **not** considered non-residential uses.

### Output **1** if:
- Output **1** if the project includes both residential **RES USE** and non-residential **Non-Residential Uses** components (or explicitly says "mixed-use")
- Residential **RES USE** and non-residential uses are both present

### Output **0** if:
- Otherwise
- The project is only residential (even if it has garages, basements, or parking)
- The project is only non-residential **Non-Residential Uses**
- The project includes live/work units, but no separate or public-facing commercial space is described



---

### Example:

Short Description:
"A DENSITY BONUS TO ALLOW A 5-STORY, 70-UNIT SENIOR HOUSING DEVELOPMENT, INCLUDING AFFORDABLE AND MARKET-RATE UNITS."

Entitlement:
"17 units set aside as low-income; remaining 53 units are market-rate."

**Expected Output Format:**
```json
{
  "Res": 1,
  "Mkt": 1,
  "Aff": 1,
  "Liv": 0,
  "Adu": 0,
  "Mix": 1
}

"""

In [53]:
load_dotenv()
key = os.environ.get("OPENAI_API_KEY")
client = OpenAI(api_key=key)

In [54]:
class Model(BaseModel):
    Res: int
    Mkt: int
    Aff: int
    Liv: int
    Adu: int
    Mix: int

In [64]:
example = """
Short Description:
"A DENSITY BONUS (3 OFF-MENU INCENTIVES) AND CONDITIONAL USE PERMIT TO EXCEED DENSITY TO ALLOW THE NEW CONSTRUCTION, USE AND MAINTENANCE OF A 5-STORY, 70-UNIT, LOW-INCOME SENIOR HOUSING PROJECT, PHP"
Entitlement:
"PURSUANT TO 12.22.A.25; A DENSITY BONUS TO ALLOW THE NEW CONSTRUCTION, USE AND MAINTENANCE OF A 5-STORY, 70-UNIT SENIOR HOUSING PROJECT WITH GREATER THAN 35% INCREASE IN DENSITY WITH OFF-MENU INCENTIVES TO ALLOW OVERALL HEIGHT OF 72-FEET IN LIEU OF THE LAMC HEIGHT OF 45-FEET, ADDITIONAL INCENTIVES TO REDUCE ACCESSIBLE PARKING SPACES; AND ALLOW THE INCREASE IN THE FAR FROM 1.5:1 TO 2.63:1 WITH 17 UNITS SET ASIDE FOR SENIOR LOW-INCOME.
PURSUANT TO 12.24.U.26, A CONDITIONAL USE PERMIT TO REQUEST A 167.5 % DENSITY BONUS AND RESERVING 64% (17) LOW INCOME UNIT 
HOUSING DATA: 53 MARKET RATE, 17 LOW INCOME UNITS FOR SENIORS"

"""

completion = client.beta.chat.completions.parse(
    model='gpt-4o-mini',  # Use the fine-tuned model instead of base model
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": example},
    ],
    response_format=Model,
)

print(f'getting prediction for: {example}')
pred = completion.choices[0].message.parsed
pred

getting prediction for: 
Short Description:
"A DENSITY BONUS (3 OFF-MENU INCENTIVES) AND CONDITIONAL USE PERMIT TO EXCEED DENSITY TO ALLOW THE NEW CONSTRUCTION, USE AND MAINTENANCE OF A 5-STORY, 70-UNIT, LOW-INCOME SENIOR HOUSING PROJECT, PHP"
Entitlement:
"PURSUANT TO 12.22.A.25; A DENSITY BONUS TO ALLOW THE NEW CONSTRUCTION, USE AND MAINTENANCE OF A 5-STORY, 70-UNIT SENIOR HOUSING PROJECT WITH GREATER THAN 35% INCREASE IN DENSITY WITH OFF-MENU INCENTIVES TO ALLOW OVERALL HEIGHT OF 72-FEET IN LIEU OF THE LAMC HEIGHT OF 45-FEET, ADDITIONAL INCENTIVES TO REDUCE ACCESSIBLE PARKING SPACES; AND ALLOW THE INCREASE IN THE FAR FROM 1.5:1 TO 2.63:1 WITH 17 UNITS SET ASIDE FOR SENIOR LOW-INCOME.
PURSUANT TO 12.24.U.26, A CONDITIONAL USE PERMIT TO REQUEST A 167.5 % DENSITY BONUS AND RESERVING 64% (17) LOW INCOME UNIT 
HOUSING DATA: 53 MARKET RATE, 17 LOW INCOME UNITS FOR SENIORS"




Model(units=70, multi=0, single=0, lowinc=1)

In [65]:
# Prepare the data for fine-tuning
def prepare_finetune_data(df):
    examples = []
    
    for _, row in df.iterrows():
        # Get the text input (short_description and entitlement)
        short_desc = str(row['short_description']) if not pd.isna(row['short_description']) else ""
        entitlement = str(row['entitlement']) if not pd.isna(row['entitlement']) else ""
        
        # Combine the text inputs
        text = f"Short Description: {short_desc}\nEntitlement: {entitlement}"
        
        # Get the labels for the Model
        # 'has_residential', 'has_market_rate', 'has_affordable_lowinc', 'has_livework', 'has_adu', 'has_mixed'
        res = int(row['has_residential']) if not pd.isna(row['has_residential']) else 0
        mkt = int(row['has_market_rate']) if not pd.isna(row['has_market_rate']) else 0
        aff = int(row['has_affordable_lowinc']) if not pd.isna(row['has_livework']) else 0
        liv = int(row['has_livework']) if not pd.isna(row['has_livework']) else 0
        adu = int(row['has_adu']) if not pd.isna(row['has_adu']) else 0
        mix = int(row['has_mixeduse']) if not pd.isna(row['has_mixeduse']) else 0


        # Format the output as a JSON string with the required format
        # model_output = f"{{'units':{units},'multi':{multi},'single':{single},'lowinc':{lowinc}}}"
        model_output = f"{{'Ees':{res}, 'Mkt':{mkt}, 'Aff':{aff}, 'Liv':{liv}, 'Adu':{adu}, 'Mix':{mix}}}"

        
        # Create the example in the required format
        example = {
            "messages": [
                {"role": "system", "content": instructions},
                {"role": "user", "content": text},
                {"role": "assistant", "content": model_output}
            ]
        }
        
        examples.append(example)
    
    return examples

# Prepare training and test data
train_examples = prepare_finetune_data(train_df)
test_examples = prepare_finetune_data(test_df)

# Create directory if it doesn't exist
os.makedirs('finetune_data', exist_ok=True)

# Export training data
with open('finetune_data/train.jsonl', 'w') as f:
    for example in train_examples:
        f.write(json.dumps(example) + '\n')

# Export test data
with open('finetune_data/test.jsonl', 'w') as f:
    for example in test_examples:
        f.write(json.dumps(example) + '\n')

print(f"Exported {len(train_examples)} training examples and {len(test_examples)} test examples to JSONL files.")


Exported 100 training examples and 100 test examples to JSONL files.


In [66]:



train_output_file_path = "finetune_data/train.jsonl"
validation_output_file_path = "finetune_data/test.jsonl"

train_file = client.files.create(
  file=open(train_output_file_path, "rb"),
  purpose="fine-tune"
)

valid_file = client.files.create(
  file=open(validation_output_file_path, "rb"),
  purpose="fine-tune"
)

print(f"Training file Info: {train_file}")
print(f"Validation file Info: {valid_file}")

Training file Info: FileObject(id='file-2nh8JmMvctGF2E7VS2LZSc', bytes=242574, created_at=1742151002, filename='train.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None, expires_at=None)
Validation file Info: FileObject(id='file-T269KqGcQX9XUuXAnqbXEu', bytes=240589, created_at=1742151003, filename='test.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None, expires_at=None)


In [67]:
model = client.fine_tuning.jobs.create(
  training_file=train_file.id, 
  validation_file=valid_file.id,
  model="gpt-4o-mini-2024-07-18", 
  hyperparameters={
    "n_epochs": 4,
	"batch_size": 8,
	"learning_rate_multiplier": 0.2
  },
  suffix="housing_desc_trainset100_multirun3"  # Added tag to identify this run
)
job_id = model.id
status = model.status

print(f'Fine-tuning model with jobID: {job_id}.')
print(f"Training Response: {model}")
print(f"Training Status: {status}")
print(f"Run tag: housing_desc_run_1")

Fine-tuning model with jobID: ftjob-iWP2X4YBent6s7a7XD9MKNsv.
Training Response: FineTuningJob(id='ftjob-iWP2X4YBent6s7a7XD9MKNsv', created_at=1742151005, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size=8, learning_rate_multiplier=0.2, n_epochs=2), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-J2DCfq0j1nXInCHinBoPqK5v', result_files=[], seed=1804937964, status='validating_files', trained_tokens=None, training_file='file-2nh8JmMvctGF2E7VS2LZSc', validation_file='file-T269KqGcQX9XUuXAnqbXEu', estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size=8, learning_rate_multiplier=0.2, n_epochs=2)), type='supervised'), user_provided_suffix='housing_desc_trainset100_multirun3', metadata=None)
Training Status: validating_files
Run tag: housing_desc_run_1


In [73]:
client.fine_tuning.jobs.retrieve(job_id)

FineTuningJob(id='ftjob-iWP2X4YBent6s7a7XD9MKNsv', created_at=1742151005, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal:housing-desc-trainset100-multirun3:BBnVBwIP', finished_at=1742151592, hyperparameters=Hyperparameters(batch_size=8, learning_rate_multiplier=0.2, n_epochs=2), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-J2DCfq0j1nXInCHinBoPqK5v', result_files=['file-71RBkPfUS9YgAkRYWsp3ta'], seed=1804937964, status='succeeded', trained_tokens=135246, training_file='file-2nh8JmMvctGF2E7VS2LZSc', validation_file='file-T269KqGcQX9XUuXAnqbXEu', estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size=8, learning_rate_multiplier=0.2, n_epochs=2)), type='supervised'), user_provided_suffix='housing_desc_trainset100_multirun3', metadata=None)

In [74]:
result = client.fine_tuning.jobs.list()

# Retrieve the fine tuned model
fine_tuned_model = result.data[0].fine_tuned_model
print(fine_tuned_model)

ft:gpt-4o-mini-2024-07-18:personal:housing-desc-trainset100-multirun3:BBnVBwIP


In [75]:
test_truelabels_ls = []
test_text_ls = []
for _, row in test_df.iterrows():
        # Get the text input (short_description and entitlement)
        short_desc = str(row['short_description']) if not pd.isna(row['short_description']) else ""
        entitlement = str(row['entitlement']) if not pd.isna(row['entitlement']) else ""
        
        # Combine the text inputs
        text = f"Short Description: {short_desc}\nEntitlement: {entitlement}"
        
        # Get the label (proposed_adding)
        label = row['proposed_adding']
        if pd.isna(label):
                label = -1

        test_text_ls.append(text)
        test_truelabels_ls.append(label)
        

In [76]:
type(test_truelabels_ls[1])

float

In [77]:

predlabels_ls = []
for comment in test_text_ls:
  completion = client.beta.chat.completions.parse(
    model=fine_tuned_model,  # Use the fine-tuned model instead of base model
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": comment},
    ],
    response_format=Model,
)
  print(f'getting prediction for: {comment}')
  pred = completion.choices[0].message.parsed
  
  print(f'predicted label: {pred}')
  # predlabels_ls.append(pred.predicted_label)
  predlabels_ls.append(pred.units)

predlabels_ls

getting prediction for: Short Description: CONSTRUCTION OF (N) TWO-STORY SFD WITH BASEMENT AND ATTACHED GARAGE AND EXTENSION OF EXISTING SEWER LINE APPROXIMATELY 100 FEET ON A VACANT LOT IN A SINGLE-JURISDICTION COASTAL ZONE.
Entitlement: PURSUANT TO SEC. 12.20.2 A COASTAL DEVELOPMENT PERMIT (CDP) FOR THE CONSTRUCTION OF A NEW 2-STORY SINGLE FAMILY DWELLING WITH BASEMENT AND ATTACHED GARAGE AND EXTENSION OF EXISTING SEWER LINE APPROXIMATELY 100 FEET ON A VACANT LOT IN A SINGLE-JURISDICTION COASTAL ZONE.
predicted label: units=1 lowinc=0
getting prediction for: Short Description: PROPOSED DEMOLITION OF AN (E) 55,398 SQ FT, TWO-STORY COMM'L BLDG & CONSTRUCTION OF A (N) 368,896 SQ FT, PHASED, MIXED-USE DEVELOPMENT
Entitlement: PURSUANT TO LAMC SECTION 11.5.7, APPLICANT REQUESTS A MAJOR PROJECT PERMIT COMPLIANCE TO ALLOW THE DEMOLITION OF AN (E) 55,398 SQ FT, TWO-STORY COMMERCIAL BLDG & CONSTRUCTION OF A (N) 368,896 SQ FT, PHASED, MIXED-USE DEVELOPMENT WITHIN THE WARNER CENTER SPECIFIC PLA

[1,
 170,
 1,
 39,
 1,
 1,
 1,
 1,
 4,
 184,
 1,
 4,
 1,
 -1,
 1,
 1,
 1,
 55,
 236,
 2,
 1,
 21,
 1,
 3,
 1,
 2,
 1,
 2,
 2,
 352,
 4,
 -1,
 1,
 6,
 1,
 2,
 1,
 1,
 1,
 10,
 410,
 18,
 1,
 1,
 -1,
 21,
 2,
 1,
 92,
 2,
 1,
 1,
 2,
 28,
 58,
 4,
 26,
 1,
 4,
 1,
 2,
 -1,
 1,
 29,
 1,
 1,
 1,
 9,
 4,
 23,
 5,
 4,
 1,
 48,
 269,
 8,
 1,
 2,
 1,
 1,
 4,
 1,
 1,
 99,
 202,
 15,
 1,
 200,
 4,
 4,
 1,
 1,
 1,
 1,
 16,
 72,
 2,
 74,
 4,
 -1]

In [78]:
# Calculate accuracy between predicted labels and true labels
correct_predictions = sum(1 for pred, true in zip(predlabels_ls, test_truelabels_ls) if pred == true)
total_predictions = len(test_truelabels_ls)
accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0

print(f"Accuracy: {accuracy:.4f} ({correct_predictions}/{total_predictions})")




Accuracy: 0.7400 (74/100)


In [79]:
correct = 0
total = 0
for i in range(100):
  if predlabels_ls[i] == -1:
    continue
  
  if test_truelabels_ls[i] == predlabels_ls[i]:
    correct += 1

  total += 1

correct/total

0.7789473684210526

In [80]:
# Print examples where the predicted label is -1
print("Examples where predicted label is -1:")
count = 0
for i, pred_label in enumerate(predlabels_ls):
    if pred_label == -1 and count < 99:  # Limiting to 5 examples for readability
        print(f"\nExample {i}:")
        print(f"Text: {test_text_ls[i]}")
        print(f"True label: {test_truelabels_ls[i]}")
        count += 1


Examples where predicted label is -1:

Example 13:
Text: Short Description: PROPOSED DEMOLITION AN AN (E) SFD AND CONSTRUCTION OF (N) SFD AND ATTACHED TWO-CAR GARAGE
Entitlement: PURSUANT TO LAMC 12.20.2, APPLICANT REQUESTS A COASTAL DEVELOPMENT PERMIT AND MELLO ACT COMPLIANCE; AND PURSUANT TO LAMC 11.5.7, APPLICANT REQUESTS A MAJOR PROJECT PERMIT COMPLIANCE TO ALLOW THE DEMOLITION OF AN (E) SFD AND CONSTRUCTION OF (N) SFD  AND ATTACHED GARAGE WITHIN THE SINGLE JURISDICTION COASTAL ZONE AND THE VENICE SPECIFIC PLAN AREA.
A REQUEST FOR A ZONING ADMINSTRATOR'S ADJUSTMENT, PER LAMC 12.28, TO ALLOW A REDUCED REAR YARD SETBACK OF 11' - 6'' IN LIEU OF 15'-0 AS REQUIRED IN THE R2 ZONE PER LAMC 12.09.C-3.
True label: 1.0

Example 31:
Text: Short Description: PURSUANT TO CODE SECTION 12.24X.28 (7), ZONING ADMINISTRATORS ADJUSTMENT TO ALLOW ROADWAY OF 14 FEET IN LIEU OF THE REQUIRED 20 FOOT WIDTH OF ROADWAY.
Entitlement: PURSUANT TO CODE SECTION 12.24X.28 (7), ZONING ADMINISTRATORS ADJUSTMENT TO

In [81]:
# Print examples where the predicted label is not equal to the true label
print("Examples where predicted label != true label:")
count = 0
for i, (pred_label, true_label) in enumerate(zip(predlabels_ls, test_truelabels_ls)):
    if pred_label != -1 and pred_label != true_label and count < 99:  # Limiting to 10 examples for readability
        print(f"\nExample {i}:")
        print(f"Text: {test_text_ls[i]}")
        print(f"True label: {true_label}")
        print(f"Predicted label: {pred_label}")
        count += 1


Examples where predicted label != true label:

Example 6:
Text: Short Description: REQUEST FOR PROJECT PERMIT COMPLIANCE AND DESIGN REVIEW OF THE PROPOSED CONSTRUCTION OF A (N) TWO-STORY SFD WITH DETACHED GARAGE IN THE HOLLYWOODLAND SPECIFIC PLAN AREA
Entitlement: PURSUANT TO LAMC 11.5.7 AND 16.50, APPLICANT REQUESTS PROJECT PERMIT COMPLIANCE AND DESIGN REVIEW FOR THE PROPOSED CONSTRUCTION OF A (N) TWO-STORY SFD WITH DETACHED GARAGE IN THE HOLLYWOODLAND SPECIFIC PLAN AREA
True label: 2.0
Predicted label: 1

Example 12:
Text: Short Description: NEW SINGLE FAMILY HOUSE
Entitlement: PROJECT PERMIT FOR THE CONSTRUCTION OF A NEW SINGLE FAMILY HOUSE PER THE REQUIREMENTS OF THE VALLEY VILLAGE SPECIFIC PLAN.
True label: 2.0
Predicted label: 1

Example 17:
Text: Short Description: DEMOLITION OF 3 EXISTING RESIDENTIAL UNITS AND THE CONSTRUCTION, USE, MAINTANCE OF A 4 STORY, 55 DWELLING UNITS (INCLUDING 17 VERY LOW AND 37 LOW INCOME) FOR SENIOR HOUSING.
Entitlement: PURSUANT TO LAMC 12.22.A 25.F.

In [82]:
# Calculate the average difference between true and predicted labels for incorrect predictions
total_diff = 0
incorrect_count = 0

for i, (pred_label, true_label) in enumerate(zip(predlabels_ls, test_truelabels_ls)):
    if pred_label != -1 and pred_label != true_label:
        diff = abs(true_label - pred_label)
        total_diff += diff
        incorrect_count += 1

if incorrect_count > 0:
    avg_diff = total_diff / incorrect_count
    print(f"Average difference between true and predicted labels for incorrect predictions: {avg_diff}")
else:
    print("No incorrect predictions found (excluding cases where predicted label is -1).")


Average difference between true and predicted labels for incorrect predictions: 5.0


In [83]:
# Calculate how many incorrect predictions are off by only 1
off_by_one_count = 0
total_incorrect = 0

for pred_label, true_label in zip(predlabels_ls, test_truelabels_ls):
    if pred_label != -1 and pred_label != true_label:
        total_incorrect += 1
        if abs(true_label - pred_label) == 1:
            off_by_one_count += 1

if total_incorrect > 0:
    percentage_off_by_one = (off_by_one_count / total_incorrect) * 100
    print(f"Number of predictions off by exactly 1: {off_by_one_count} out of {total_incorrect} incorrect predictions")
    print(f"Percentage of incorrect predictions that are off by exactly 1: {percentage_off_by_one:.2f}%")
else:
    print("No incorrect predictions found (excluding cases where predicted label is -1).")


Number of predictions off by exactly 1: 13 out of 21 incorrect predictions
Percentage of incorrect predictions that are off by exactly 1: 61.90%
