<a href="https://colab.research.google.com/github/mohammadbadi/CrimeAnalytics_Clustering/blob/main/Code%20Sections/5.4.3%20Feature%20Engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **5.4.3 Feature Engineering - Approach_3**

In [1]:
import warnings                                                                   # Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score
from sklearn.decomposition import PCA
from google.colab import files
from IPython.display import display, HTML

                                                                                  # URL of the Dataset
url = "https://raw.githubusercontent.com/mohammadbadi/CrimeAnalytics_Clustering/refs/heads/main/Output_CSV/Final_Data.csv"
try:
  df = pd.read_csv(url)
                                                                                  # Print statement removed, only using HTML display below
except Exception as e:
  display(HTML(f"<p style='color: red; font-size: 16px; font-weight: bold;'>Error loading data: {e}</p>"))
  exit()

Data_Preparing_df = pd.read_csv(url)
display(HTML("<p style='color: green; font-size: 16px; font-weight: bold;'>Data loaded successfully.</p>"))

# -------------------- Feature Engineering --------------------
# Capture initial column count
initial_cols_count = len(Data_Preparing_df.columns)

# Define the grouping for location types
residential_types = [
    'Apartment (Rooming House, Condo)',
    'Single Home, House (Attach Garage, Cottage, Mobile)',
    'Group Homes (Non-Profit, Halfway House, Social Agency)',
    'Community Group Home', 'Retirement Home', 'Nursing Home',
    'Private Property Structure (Pool, Shed, Detached Garage)'
]
public_types = [
    'Streets, Roads, Highways (Bicycle Path, Private Road)',
    'Open Areas (Lakes, Parks, Rivers)',
    "Other Non Commercial / Corporate Places (Non-Profit, Gov'T, Firehall)",
    'Parking Lots (Apt., Commercial Or Non-Commercial)'
]

# Work on a copy for feature engineering
df = Data_Preparing_df.copy()

# Engineer new columns based on pre-defined groups
df['Location_Engineered_Residential'] = df['LOCATION_TYPE'].apply(lambda x: 'Residential' if x in residential_types else None)
df['Location_Engineered_Public']      = df['LOCATION_TYPE'].apply(lambda x: 'Public' if x in public_types else None)
df['Location_Engineered_Other']       = df['LOCATION_TYPE'].apply(lambda x: 'Other' if (x not in residential_types and x not in public_types) else None)

# Capture final column count after feature engineering
final_cols_count = len(df.columns)

# Build steps_summary with 3 engineered rows and one final row for column counts
steps_summary = []
steps_summary.append({
    "Original Feature": "LOCATION_TYPE",
    "Action Taken": "Engineered new column Location_Engineered_Residential",
    "Rationale": "Captures dwelling types in residential areas."
})
steps_summary.append({
    "Original Feature": "LOCATION_TYPE",
    "Action Taken": "Engineered new column Location_Engineered_Public",
    "Rationale": "Groups public and community space locations."
})
steps_summary.append({
    "Original Feature": "LOCATION_TYPE",
    "Action Taken": "Engineered new column Location_Engineered_Other",
    "Rationale": "Identifies location types that do not fit the primary groups."
})
steps_summary.append({
    "Original Feature": "Columns affected in <br><strong>5.4.3 Feature Engineering - Approach_3</strong>",
    "Action Taken": "Initial Columns: <strong><br>" + str(initial_cols_count) + "</strong>",
    "Rationale": "Final Columns: <strong><br>" + str(final_cols_count) + "</strong>"
})

# Build HTML Table for Feature Engineering Phase with alternate row shading
html_table = """
<table style='border-collapse: collapse; width: 100%; font-size: 18px;'>
    <thead style='background-color: #4CAF50; color: white;'>
        <tr>
            <th colspan="3" style="text-align: center; font-size: 24px; background-color: #2f4f4f; color: white;">
                5.4.3 Feature Engineering Phase - Approach_3
            </th>
        </tr>
        <tr>
            <th style='border: 1px solid #dddddd; padding: 8px;'>Original Feature</th>
            <th style='border: 1px solid #dddddd; padding: 8px;'>Action Taken</th>
            <th style='border: 1px solid #dddddd; padding: 8px;'>Rationale</th>
        </tr>
    </thead>
    <tbody>
"""

# Add rows with alternating shading
for i, step in enumerate(steps_summary):
    bg_color = "#f2f2f2" if i % 2 == 0 else "white"
    html_table += f"""
        <tr style='border: 1px solid #dddddd; background-color: {bg_color};'>
            <td style='border: 1px solid #dddddd; padding: 8px;'>{step["Original Feature"]}</td>
            <td style='border: 1px solid #dddddd; padding: 8px;'>{step["Action Taken"]}</td>
            <td style='border: 1px solid #dddddd; padding: 8px;'>{step["Rationale"]}</td>
        </tr>
    """

# Add footer note inside the table as a row spanning all columns
note_text = (
    "Feature Engineering completed and saved as <span style='color: green;'>FEngineered_New.csv</span> for further analysis."
)
html_table += f"""
        <tr style='border: 1px solid #dddddd;'>
            <td colspan="3" style='border: 1px solid #dddddd; padding: 8px; background-color: #f8f8f8;'>
                <strong>{note_text}</strong>
            </td>
        </tr>
    </tbody>
</table>
"""

display(HTML(html_table))
df.to_csv("FEngineered_New.csv", index=False)                      # Save engineered data as FEngineered_New.csv
files.download("FEngineered_New.csv")


5.4.3 Feature Engineering Phase - Approach_3,5.4.3 Feature Engineering Phase - Approach_3,5.4.3 Feature Engineering Phase - Approach_3
Original Feature,Action Taken,Rationale
LOCATION_TYPE,Engineered new column Location_Engineered_Residential,Captures dwelling types in residential areas.
LOCATION_TYPE,Engineered new column Location_Engineered_Public,Groups public and community space locations.
LOCATION_TYPE,Engineered new column Location_Engineered_Other,Identifies location types that do not fit the primary groups.
Columns affected in 5.4.3 Feature Engineering - Approach_3,Initial Columns: 18,Final Columns: 21
Feature Engineering completed and saved as FEngineered_New.csv for further analysis.,Feature Engineering completed and saved as FEngineered_New.csv for further analysis.,Feature Engineering completed and saved as FEngineered_New.csv for further analysis.
