In [1]:
!pip install lifelines



In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from lifelines import KaplanMeierFitter, CoxPHFitter
import ipywidgets as widgets
from IPython.display import display, clear_output

plt.style.use('ggplot')
sns.set_palette("viridis")
print("Environment Ready: Survival Analysis Engines Loaded.")

Environment Ready: Survival Analysis Engines Loaded.


In [13]:
# --- CELL 2: DATA GENERATION (UPDATED WITH HIGHLAND PARK PH2) ---
import pandas as pd
import numpy as np

def generate_market_data():
    np.random.seed(2025)
    
    # ==========================================
    # 1. TERRACE AFRICA (Operational Assets)
    # ==========================================
    # Verified Updates:
    # - Highland Park Ph2 Added (3,050mÂ²). Anchor: Booties Pharmacy.
    # - Village Walk (8,645mÂ²). Anchor: Pick n Pay.
    
    assets = {
        "Village Walk Borrowdale": {
            "GLA": 8645, 
            "Tenants": ["Pick n Pay (Anchor)", "Woolworths", "Mugg & Bean", "Ocean Basket", "Sorbet", "Solution Centre", "Cafe Nush"]
        },
        "Greenfields Retail Centre": {
            "GLA": 9729, 
            "Tenants": ["Spar (Anchor)", "The Smokehouse", "KFC Drive-Thru", "CafÃ© Nush", "Rollers", "Pharmachoice"]
        },
        "Highland Park Ph1": {
            "GLA": 6450, 
            "Tenants": ["Pick n Pay (Anchor)", "Puma Energy", "Chicken Inn", "Pizza Inn", "Creamy Inn", "Design Quarter"]
        },
        "Highland Park Ph2": {
            "GLA": 3050, 
            "Tenants": ["Booties Pharmacy (Anchor)", "RocoMamas", "Spur", "Ocean Basket", "First Capital Bank", "Liquor Supplies", "Safari Trading Co."]
        },
        "Chinamano Corner": {
            "GLA": 1987, 
            "Tenants": ["Puma Energy (Anchor)", "Simbisa Drive-Thru", "Pharmacy", "Convenience Store"]
        }
    }
    
    terrace_rows = []
    
    for mall, specs in assets.items():
        current_gla = 0
        total_capacity = specs['GLA']
        
        # 1. Place Known Anchors & Majors
        for tenant in specs['Tenants']:
            # Assign GLA based on Tenant Type
            if "Pick n Pay" in tenant or "Spar" in tenant:
                size = 2500 
            elif "Woolworths" in tenant:
                size = 1200
            elif "Booties Pharmacy" in tenant:
                size = 600 # Large specialized pharmacy anchor
            elif "Puma" in tenant:
                size = 800
            elif "RocoMamas" in tenant or "Spur" in tenant or "Ocean Basket" in tenant:
                size = 350 # Sit-down dining size
            elif "Bank" in tenant:
                size = 250
            else:
                size = np.random.randint(100, 300)
            
            terrace_rows.append({
                'Tenant_ID': f"TA-{np.random.randint(1000,9999)}",
                'Portfolio': 'Terrace Africa',
                'Asset_Name': mall,
                'Tenant_Name': tenant,
                'GLA_Occupied': size,
                'Rent_per_Sqm': np.random.uniform(18, 25) if size > 1000 else np.random.uniform(25, 45),
                'Lease_Expiry_Months': np.random.randint(6, 60),
                'Late_Payments_Last_12M': np.random.choice([0, 1, 3], p=[0.8, 0.15, 0.05]),
                'Footfall_Trend': np.random.choice(['Stable', 'Declining'], p=[0.85, 0.15]),
            })
            current_gla += size

        # 2. Fill Remainder with Line Shops
        while current_gla < (total_capacity * 0.95): 
            size = np.random.randint(50, 150)
            terrace_rows.append({
                'Tenant_ID': f"TA-{np.random.randint(1000,9999)}",
                'Portfolio': 'Terrace Africa',
                'Asset_Name': mall,
                'Tenant_Name': f"Line Shop {np.random.randint(100,999)}",
                'GLA_Occupied': size,
                'Rent_per_Sqm': np.random.uniform(30, 50),
                'Lease_Expiry_Months': np.random.randint(6, 36),
                'Late_Payments_Last_12M': np.random.poisson(0.5),
                'Footfall_Trend': np.random.choice(['Stable', 'Declining'], p=[0.7, 0.3]),
            })
            current_gla += size

    # ==========================================
    # 2. WESTPROP HOLDINGS (Development)
    # ==========================================
    westprop_rows = []
    moz_capacity = 90000
    moz_filled = 0
    
    target_mix = [
        ("Checkers Hyper", 5000), ("Game", 4500), ("Edgars", 3000), ("Truworths", 2500),
        ("Ster-Kinekor", 2000), ("Virgin Active", 2500), ("H&M", 2500), ("Zara", 2500)
    ]
    
    for name, size in target_mix:
        westprop_rows.append({
            'Tenant_ID': f"WP-{np.random.randint(1000,9999)}",
            'Portfolio': 'WestProp',
            'Asset_Name': 'Mall of Zimbabwe',
            'Tenant_Name': name,
            'GLA_Occupied': size,
            'Fit_Out_Budget_USD': size * 800,
            'Pre_Let_Status': 'Committed',
            'Deposit_Paid': True
        })
        moz_filled += size
        
    while moz_filled < (moz_capacity * 0.6): 
        size = np.random.randint(100, 500)
        westprop_rows.append({
            'Tenant_ID': f"WP-{np.random.randint(1000,9999)}",
            'Portfolio': 'WestProp',
            'Asset_Name': 'Mall of Zimbabwe',
            'Tenant_Name': f"Speculative Retail {np.random.randint(1,999)}",
            'GLA_Occupied': size,
            'Fit_Out_Budget_USD': size * 400,
            'Pre_Let_Status': np.random.choice(['Negotiating', 'Committed'], p=[0.7, 0.3]),
            'Deposit_Paid': np.random.choice([True, False], p=[0.3, 0.7])
        })
        moz_filled += size

    # Create DataFrames
    df_ta = pd.DataFrame(terrace_rows)
    df_ta['Risk_Flag'] = (df_ta['Late_Payments_Last_12M'] > 1) | (df_ta['Footfall_Trend'] == 'Declining')
    
    df_wp = pd.DataFrame(westprop_rows)
    df_wp['Dropout_Risk'] = (df_wp['Pre_Let_Status'] == 'Negotiating') & (df_wp['Deposit_Paid'] == False)

    # Save to CSV
    df_ta.to_csv("terrace_africa_v2.csv", index=False)
    df_wp.to_csv("westprop_v2.csv", index=False)
    
    return df_ta, df_wp

df_terrace, df_westprop = generate_market_data()
print(f"Data Updated with Highland Park Ph2.")
print(f"   - Added: Booties Pharmacy, RocoMamas, Spur, Ocean Basket.")
print(f"   - Terrace Africa Total Tenants: {len(df_terrace)}")

Data Updated with Highland Park Ph2.
   - Added: Booties Pharmacy, RocoMamas, Spur, Ocean Basket.
   - Terrace Africa Total Tenants: 155


In [15]:
# --- CELL 3: SURVIVAL LOGIC & ANALYSIS ---

def run_risk_model(df, mode):
    cph = CoxPHFitter()
    
    if mode == 'Terrace':
        # TERRACE MODEL: Risk of Non-Renewal (Operational)
        # Features: Size (GLA), Rent, Late Payments, Expiry
        features = df[['GLA_Occupied', 'Rent_per_Sqm', 'Late_Payments_Last_12M', 'Lease_Expiry_Months', 'Risk_Flag']]
        cph.fit(features, duration_col='Lease_Expiry_Months', event_col='Risk_Flag')
        
    else: # WestProp
        # WESTPROP MODEL: Risk of Dropout (Pre-Letting Phase)
        # Features: Investment Size (Fit Out), Deposit Paid status
        features = df[['GLA_Occupied', 'Fit_Out_Budget_USD', 'Deposit_Paid', 'Dropout_Risk']]
        cph.fit(features, duration_col='Fit_Out_Budget_USD', event_col='Dropout_Risk')
        
    return cph

# --- TEST RUN (EXECUTE BOTH) ---

# 1. Analyze Terrace Africa
print("ðŸ‡¿ðŸ‡¼ TERRACE AFRICA: Operational Risk Factors")
print("------------------------------------------------")
cph_ta = run_risk_model(df_terrace, 'Terrace')
cph_ta.print_summary(columns=['coef', 'exp(coef)', 'p'])

print("\n" + "="*60 + "\n")

# 2. Analyze WestProp (Mall of Zim)
print("WESTPROP: Pre-Letting Dropout Risk")
print("------------------------------------------------")
cph_wp = run_risk_model(df_westprop, 'WestProp')
cph_wp.print_summary(columns=['coef', 'exp(coef)', 'p'])

ðŸ‡¿ðŸ‡¼ TERRACE AFRICA: Operational Risk Factors
------------------------------------------------


0,1
model,lifelines.CoxPHFitter
duration col,'Lease_Expiry_Months'
event col,'Risk_Flag'
baseline estimation,breslow
number of observations,155
number of events observed,57
partial log-likelihood,-229.00
time fit was run,2025-12-26 18:34:10 UTC

Unnamed: 0,coef,exp(coef),p
GLA_Occupied,-0.0,1.0,0.09
Rent_per_Sqm,0.01,1.01,0.62
Late_Payments_Last_12M,0.33,1.39,0.02

0,1
Concordance,0.63
Partial AIC,464.01
log-likelihood ratio test,17.49 on 3 df
-log2(p) of ll-ratio test,10.80




WESTPROP: Pre-Letting Dropout Risk
------------------------------------------------



>>> events = df['Dropout_Risk'].astype(bool)
>>> print(df.loc[events, 'Deposit_Paid'].var())
>>> print(df.loc[~events, 'Deposit_Paid'].var())

A very low variance means that the column Deposit_Paid completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0,1
model,lifelines.CoxPHFitter
duration col,'Fit_Out_Budget_USD'
event col,'Dropout_Risk'
baseline estimation,breslow
number of observations,107
number of events observed,45
partial log-likelihood,-129.41
time fit was run,2025-12-26 18:34:10 UTC

Unnamed: 0,coef,exp(coef),p
GLA_Occupied,-0.01,0.99,0.01
Deposit_Paid,-2.53,0.08,0.01

0,1
Concordance,1.00
Partial AIC,262.81
log-likelihood ratio test,97.45 on 2 df
-log2(p) of ll-ratio test,70.30
