# 🏘️ Istanbul Neighborhood Soul Generator
Generate comprehensive, insightful descriptions of Istanbul neighborhoods using Llama with 4-bit quantization

**Runtime:** Make sure you're using a GPU runtime (T4, L4, or A100)
- Go to Runtime → Change runtime type → Select GPU

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Install required packages
!pip install -q transformers accelerate bitsandbytes pandas sentencepiece protobuf huggingface_hub autoawq

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/74.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.3/74.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m45.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for autoawq (setup.py) ... [?25l[?25hdone


In [4]:
# Login to Hugging Face (required for Llama models)
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
# Upload your CSV file
from google.colab import files
import io

print("📁 Upload your CSV file with neighborhood data:")
uploaded = files.upload()

# Get the filename
csv_filename = list(uploaded.keys())[0]
print(f"\n✅ Uploaded: {csv_filename}")

📁 Upload your CSV file with neighborhood data:


Saving istanbul_mahalle_complete_data.csv to istanbul_mahalle_complete_data.csv

✅ Uploaded: istanbul_mahalle_complete_data.csv


In [6]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import warnings
warnings.filterwarnings('ignore')

# Load and preview the data
df = pd.read_csv(csv_filename)
print(f"📊 Loaded {len(df)} neighborhoods\n")
print("Preview:")
df.head()

📊 Loaded 164 neighborhoods

Preview:


Unnamed: 0,Mahalle,İlçe,Enlem,Boylam,INDEX_YASAM_KALITESI,INDEX_YURUNEBILIRLIK,KULTUREL_AKTIVITE_INDEX,restaurant,library,school,...,agir_yarali_sayisi,hastanede_tedavi_sayisi,hafif_yarali_sayisi,dogalgaz_boru_hasari,icme_suyu_boru_hasari,atik_su_boru_hasari,gecici_barinma,Avg_Rent_Per_SqM,Green_Index,Society_Welfare_Index
0,Balmumcu,Beşiktaş,41.059527,29.015073,,,,0,0,13,...,,,,,,,,560,0.93,1.0
1,Bebek,Beşiktaş,41.07897,29.043979,,,,11,1,4,...,,,,,,,,560,0.93,1.0
2,Kültür,Beşiktaş,41.072961,29.032796,,,,13,0,2,...,,,,,,,,560,0.93,1.0
3,Kuruçeşme,Beşiktaş,41.06149,29.034273,,,,16,0,14,...,,,,,,,,560,0.93,1.0
4,Mecidiye,Beşiktaş,41.052509,29.019187,,,,10,0,5,...,,,,,,,,560,0.93,1.0


In [7]:
df.columns.unique()

Index(['Mahalle', 'İlçe', 'Enlem', 'Boylam', 'INDEX_YASAM_KALITESI',
       'INDEX_YURUNEBILIRLIK', 'KULTUREL_AKTIVITE_INDEX', 'restaurant',
       'library', 'school', 'park', 'atm', 'cafe', 'pharmacy', 'hospital',
       'mosque', 'bus_station', 'train_station', 'transit_station',
       'Toplam Geçerli Oy', 'Toplam Geçersiz Oy', 'CHP', 'AK PARTİ', 'SAADET',
       'VATAN PARTİSİ', 'Nüfus', 'mahalle_uavt', '1980_oncesi',
       '1980-2000_arasi', '2000_sonrasi', '1-4 kat_arasi', '5-9 kat_arasi',
       '9-19 kat_arasi', 'mahalle_koy_uavt', 'cok_agir_hasarli_bina_sayisi',
       'agir_hasarli_bina_sayisi', 'orta_hasarli_bina_sayisi',
       'hafif_hasarli_bina_sayisi', 'can_kaybi_sayisi', 'agir_yarali_sayisi',
       'hastanede_tedavi_sayisi', 'hafif_yarali_sayisi',
       'dogalgaz_boru_hasari', 'icme_suyu_boru_hasari', 'atik_su_boru_hasari',
       'gecici_barinma', 'Avg_Rent_Per_SqM', 'Green_Index',
       'Society_Welfare_Index'],
      dtype='object')

In [8]:
# Configure 4-bit quantization for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [9]:
torch.cuda.empty_cache()

print("🔧 Loading Llama 3.3 8B in full quality...")
print(f"💾 Available GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB\n")

model_name = "meta-llama/Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

🔧 Loading Llama 3.3 8B in full quality...
💾 Available GPU Memory: 39.56 GB



tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [10]:
def create_comprehensive_prompt(row):
    """Create a detailed prompt for generating neighborhood descriptions"""

    mahalle = row['Mahalle']
    ilce = row['İlçe']

    # Geographic data
    enlem = row.get('Enlem', None)
    boylam = row.get('Boylam', None)
    nufus = row.get('Nüfus', 0)

    # Quality indices
    quality_of_life = row.get('INDEX_YASAM_KALITESI', None)
    walkability = row.get('INDEX_YURUNEBILIRLIK', None)
    cultural_activity = row.get('KULTUREL_AKTIVITE_INDEX', None)
    green_index = row.get('Green_Index', None)
    society_welfare = row.get('Society_Welfare_Index', None)

    # Political data
    total_valid_votes = row.get('Toplam Geçerli Oy', 0)
    total_invalid_votes = row.get('Toplam Geçersiz Oy', 0)
    chp = row.get('CHP', 0)
    ak_parti = row.get('AK PARTİ', 0)
    saadet = row.get('SAADET', 0)
    vatan = row.get('VATAN PARTİSİ', 0)

    # Amenities
    restaurants = row.get('restaurant', 0)
    libraries = row.get('library', 0)
    schools = row.get('school', 0)
    parks = row.get('park', 0)
    atms = row.get('atm', 0)
    cafes = row.get('cafe', 0)
    pharmacies = row.get('pharmacy', 0)
    hospitals = row.get('hospital', 0)
    mosques = row.get('mosque', 0)
    bus_stations = row.get('bus_station', 0)
    train_stations = row.get('train_station', 0)
    transit_stations = row.get('transit_station', 0)

    # Building stock
    pre_1980 = row.get('1980_oncesi', 0)
    between_1980_2000 = row.get('1980-2000_arasi', 0)
    post_2000 = row.get('2000_sonrasi', 0)
    low_rise = row.get('1-4 kat_arasi', 0)
    mid_rise = row.get('5-9 kat_arasi', 0)
    high_rise = row.get('9-19 kat_arasi', 0)

    # Earthquake damage
    very_severe_damage = row.get('cok_agir_hasarli_bina_sayisi', 0)
    severe_damage = row.get('agir_hasarli_bina_sayisi', 0)
    moderate_damage = row.get('orta_hasarli_bina_sayisi', 0)
    light_damage = row.get('hafif_hasarli_bina_sayisi', 0)
    casualties = row.get('can_kaybi_sayisi', 0)
    serious_injuries = row.get('agir_yarali_sayisi', 0)
    hospitalized = row.get('hastanede_tedavi_sayisi', 0)
    light_injuries = row.get('hafif_yarali_sayisi', 0)
    gas_damage = row.get('dogalgaz_boru_hasari', 0)
    water_damage = row.get('icme_suyu_boru_hasari', 0)
    sewage_damage = row.get('atik_su_boru_hasari', 0)
    temp_shelter = row.get('gecici_barinma', 0)

    # Real estate
    avg_rent = row.get('Avg_Rent_Per_SqM', None)

    # ===== NEW LOGIC ADDITIONS =====

    # Population density narrative
    population_narrative = ""
    if pd.notna(nufus) and nufus > 0:
        if nufus > 40000:
            population_narrative = f"Densely populated with {int(nufus):,} residents - bustling urban atmosphere"
        elif nufus > 20000:
            population_narrative = f"Well-populated area with {int(nufus):,} residents - lively community"
        elif nufus > 10000:
            population_narrative = f"Moderate population of {int(nufus):,} - balanced neighborhood feel"
        else:
            population_narrative = f"Smaller community of {int(nufus):,} residents - quiet and intimate"

    # Building age narrative
    total_buildings = pre_1980 + between_1980_2000 + post_2000
    building_age_narrative = ""
    if total_buildings > 0:
        pre_1980_pct = (pre_1980 / total_buildings * 100)
        post_2000_pct = (post_2000 / total_buildings * 100)

        if pre_1980_pct > 60:
            building_age_narrative = f"Old buildings with {pre_1980_pct:.0f}% pre-1980 buildings - charm but potential earthquake vulnerability"
        elif post_2000_pct > 60:
            building_age_narrative = f"Modern neighborhood with {post_2000_pct:.0f}% post-2000 construction - contemporary standards and seismic safety"
        elif pre_1980_pct > 40:
            building_age_narrative = f"Mix of old ({pre_1980_pct:.0f}% pre-1980) and newer construction - transitioning area"
        else:
            building_age_narrative = f"Balanced building stock across eras - diverse architectural character"

    # Building height narrative
    total_height = low_rise + mid_rise + high_rise
    building_height_narrative = ""
    if total_height > 0:
        low_pct = (low_rise / total_height * 100)
        high_pct = (high_rise / total_height * 100)

        if low_pct > 70:
            building_height_narrative = f"Low-rise character ({low_pct:.0f}% are 1-4 stories) - intimate, human-scale neighborhood"
        elif high_pct > 50:
            building_height_narrative = f"High-rise dominated ({high_pct:.0f}% are 9+ stories) - vertical urban density"
        else:
            building_height_narrative = f"Mixed-height urban fabric - varied building scales"

    # Earthquake impact assessment
    earthquake_narrative = ""
    total_damage = very_severe_damage + severe_damage + moderate_damage + light_damage
    total_casualties = casualties + serious_injuries + hospitalized + light_injuries

    if total_damage > 0 or total_casualties > 0:
        if very_severe_damage > 10 or casualties > 5:
            earthquake_narrative = f"CRITICAL: Severely will be affected by earthquake - {int(very_severe_damage)} buildings with very severe damage, {int(casualties)} casualties are predicted."
        elif severe_damage > 5 or casualties > 0:
            earthquake_narrative = f"Will be significantly impacted by next earthquake - {int(total_damage)} buildings will be damaged, {int(total_casualties)} people will be affected."
        elif total_damage > 10:
            earthquake_narrative = f"Moderate earthquake impact - {int(total_damage)} buildings will be affected, mostly light to moderate damage"
        else:
            earthquake_narrative = f"Minor earthquake damage expected - {int(total_damage)} buildings, community largely resilient"

        if temp_shelter > 100:
            earthquake_narrative += f" {int(temp_shelter)} residents may require temporary shelter."

    # Infrastructure damage assessment
    infrastructure_narrative = ""
    total_infra_damage = gas_damage + water_damage + sewage_damage
    if total_infra_damage > 0:
        damage_types = []
        if gas_damage > 0:
            damage_types.append(f"{int(gas_damage)} gas pipe may break in the next earthquake")
        if water_damage > 0:
            damage_types.append(f"{int(water_damage)} water line may rupture in the next eartquake")
        if sewage_damage > 0:
            damage_types.append(f"{int(sewage_damage)} sewage system failures expected in the next earthquake")
        infrastructure_narrative = f"Expected infrastructure stress: {', '.join(damage_types)}"

    # Green space assessment
    green_narrative = ""
    if pd.notna(green_index):
        if green_index > 0.7:
            green_narrative = f"Abundant green space (index: {green_index:.2f}) - parks and nature accessible"
        elif green_index > 0.6:
            green_narrative = f"Moderate greenery (index: {green_index:.2f}) - some recreational space"
        else:
            green_narrative = f"Limited green space (index: {green_index:.2f}) - concrete-heavy environment"

    # Social welfare assessment
    welfare_narrative = ""
    if pd.notna(society_welfare):
        if society_welfare > 0.7:
            welfare_narrative = f"Strong social fabric (welfare index: {society_welfare:.2f}) - cohesive community"
        elif society_welfare > 0.5:
            welfare_narrative = f"Moderate community welfare (index: {society_welfare:.2f})"
        else:
            welfare_narrative = f"Social challenges evident (welfare index: {society_welfare:.2f}) - support systems may be limited"

    # Rent value assessment
    rent_narrative = ""
    if pd.notna(avg_rent) and avg_rent > 0:
        if avg_rent > 400:
            rent_narrative = f"Premium market (₺{avg_rent:.0f}/sqm) - expect upscale amenities"
        elif avg_rent > 300:
            rent_narrative = f"Above-average rents (₺{avg_rent:.0f}/sqm) - desirable location"
        elif avg_rent > 250:
            rent_narrative = f"Moderate rents (₺{avg_rent:.0f}/sqm) - middle-market option"
        else:
            rent_narrative = f"Affordable rents (₺{avg_rent:.0f}/sqm) - budget-friendly"

    # Voter participation analysis
    voter_participation_narrative = ""
    if pd.notna(total_valid_votes) and pd.notna(total_invalid_votes):
        total_votes = total_valid_votes + total_invalid_votes
        if total_votes > 0:
            invalid_pct = (total_invalid_votes / total_votes * 100)
            if invalid_pct > 5:
                voter_participation_narrative = f"High invalid vote rate ({invalid_pct:.1f}%) may indicate voter dissatisfaction or confusion"
            elif total_valid_votes > 10000:
                voter_participation_narrative = f"Strong civic engagement - {int(total_valid_votes):,} valid votes cast"

    # Build political narrative
    political = "No voting data available"
    if pd.notna(total_valid_votes) and total_valid_votes > 0:
        chp_pct = (chp / total_valid_votes * 100) if pd.notna(chp) else 0
        ak_pct = (ak_parti / total_valid_votes * 100) if pd.notna(ak_parti) else 0
        saadet_pct = (saadet / total_valid_votes * 100) if pd.notna(saadet) else 0
        vatan_pct = (vatan / total_valid_votes * 100) if pd.notna(vatan) else 0

        if chp_pct > ak_pct and chp_pct > 40:
            political = f"Strong CHP support ({chp_pct:.0f}%), indicating a secular, progressive-leaning community. Not best for conservatives."
        elif ak_pct > chp_pct and ak_pct > 40:
            political = f"Strong AK Party support ({ak_pct:.0f}%), reflecting traditional and conservative values. Not best for seculars."
        elif abs(chp_pct - ak_pct) < 10:
            political = f"Politically diverse area (CHP {chp_pct:.0f}%, AK Party {ak_pct:.0f}%), representing mixed demographics"
        else:
            political = f"Varied political preferences (CHP {chp_pct:.0f}%, AK Party {ak_pct:.0f}%)"

        if saadet_pct > 10:
            political += f"; notable Saadet Party presence ({saadet_pct:.0f}%) suggests conservative religious voters"
        if vatan_pct > 10:
            political += f"; Vatan Party support ({vatan_pct:.0f}%) indicates nationalist sentiment"

    # Build cultural character
    culture_indicators = []
    if cafes > 8:
        culture_indicators.append("vibrant café culture")
    if restaurants > 10:
        culture_indicators.append("diverse dining scene")
    if libraries > 0:
        culture_indicators.append("values education and reading")
    if parks > 5:
        culture_indicators.append("outdoor-oriented lifestyle")

    if pd.notna(cultural_activity) and cultural_activity > 0.6:
        culture_indicators.append("high cultural engagement")

    culture = ", ".join(culture_indicators) if culture_indicators else "traditional neighborhood character"

    # Build quality of life narrative
    quality_narrative = ""
    if pd.notna(quality_of_life):
        if quality_of_life > 0.7:
            quality_narrative = f"Excellent quality of life (index: {quality_of_life:.2f}) - one of the area's better neighborhoods"
        elif quality_of_life > 0.5:
            quality_narrative = f"Good quality of life (index: {quality_of_life:.2f}) - comfortable and well-serviced"
        else:
            quality_narrative = f"Developing area (quality index: {quality_of_life:.2f}) - still growing"
    else:
        quality_narrative = "Quality of life data not available"

    if pd.notna(walkability):
        if walkability > 0.7:
            quality_narrative += f", highly walkable (score: {walkability:.2f})"
        elif walkability > 0.5:
            quality_narrative += f", moderately walkable (score: {walkability:.2f})"
        else:
            quality_narrative += f", car-dependent area (walkability: {walkability:.2f})"

    # Build highlights
    highlights = []
    if restaurants > 17:
        highlights.append(f"Rich culinary scene with {int(restaurants)} restaurants")
    if cafes > 12:
        highlights.append(f"Thriving café culture with {int(cafes)} venues")
    if parks > 7:
        highlights.append(f"Green and spacious with {int(parks)} parks")
    if schools > 8:
        highlights.append(f"Family-friendly with {int(schools)} schools")
    if hospitals > 2:
        highlights.append(f"Excellent healthcare access with {int(hospitals)} hospitals")
    if libraries > 0:
        highlights.append(f"{int(libraries)} libraries for community learning")

    highlights_text = ", ".join(highlights) if highlights else "Essential urban amenities available"

    # Build transport quality
    total_transit = int(bus_stations + train_stations + transit_stations)
    if total_transit > 10:
        transport_quality = f"Excellent public transport ({total_transit} stations/stops)"
    elif total_transit > 5:
        transport_quality = f"Good connectivity ({total_transit} transit points)"
    elif total_transit > 0:
        transport_quality = f"Basic public transport ({total_transit} stops)"
    else:
        transport_quality = "Limited public transport - car recommended"

    # Format variables that might be NaN for the prompt
    enlem_formatted = f"{enlem:.6f}" if pd.notna(enlem) else "N/A"
    boylam_formatted = f"{boylam:.6f}" if pd.notna(boylam) else "N/A"
    cultural_activity_formatted = f"{cultural_activity:.2f}" if pd.notna(cultural_activity) else "N/A"


    # ===== COMPREHENSIVE PROMPT =====

    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a sociologist and real estate analyst specializing in Istanbul neighborhoods. Be genuinely helpful for people considering living there.

Your description should be concrete, sincere, and informative. Don't just list numbers - tell the neighborhood's story.<|eot_id|><|start_header_id|>user<|end_header_id|>

Write a comprehensive description for this Istanbul neighborhood:

**NEIGHBORHOOD IDENTITY:**
- Name: {mahalle}, {ilce} district
- {population_narrative}
- Location: Latitude {enlem_formatted}, Longitude {boylam_formatted}

**QUALITY OF LIFE INDICES:**
- {quality_narrative}
- {green_narrative}
- {welfare_narrative}

**POLITICAL & CIVIC LANDSCAPE:**
- {political}
- {voter_participation_narrative}

**CULTURAL CHARACTER:**
- Community character: {culture}
- Cultural Activity Index: {cultural_activity_formatted}

**URBAN FABRIC & BUILDING STOCK:**
- {building_age_narrative}
- {building_height_narrative}

**EARTHQUAKE RESILIENCE & DAMAGE (2023):**
- {earthquake_narrative if earthquake_narrative else 'No significant earthquake damage reported'}
- {infrastructure_narrative if infrastructure_narrative else 'Infrastructure remained intact'}

**NEIGHBORHOOD AMENITIES:**
- Food & Social: {int(restaurants)} restaurants, {int(cafes)} cafés
- Essential Services: {int(pharmacies)} pharmacies, {int(atms)} ATMs, {int(hospitals)} hospitals
- Education: {int(schools)} schools, {int(libraries)} libraries
- Recreation: {int(parks)} parks
- Religious: {int(mosques)} mosques
- Transportation: {transport_quality}

**REAL ESTATE MARKET:**
- {rent_narrative if rent_narrative else 'Rent data not available'}

**KEY HIGHLIGHTS:**
- {highlights_text}

TASK:
Write a balanced, honest neighborhood description in English (2-3 cohesive paragraphs) that helps people make informed decisions.

**STRUCTURE YOUR DESCRIPTION:**

**Paragraph 1 - Character & Community:**
Describe who lives here, the social and political atmosphere, and what daily life feels like. Consider population density, political leanings, cultural vibrancy, and community cohesion.

**Paragraph 2 - Built Environment & Livability:**
Discuss the physical neighborhood: building ages and heights, walkability, green space, amenities, and transportation. Be specific about what's abundant and what's scarce. Address earthquake resilience honestly.

**Paragraph 3 - Honest Assessment & Fit:**
Who would thrive here and why? Who might struggle? What are the trade-offs between cost, convenience, safety, and quality of life? If earthquake damage was significant, address implications directly. Balance strengths with real limitations.

**CRITICAL RULES:**
- Use ONLY the data provided above - no fabrication
- If earthquake damage was severe, this MUST be prominently mentioned
- If services are sparse (low numbers), acknowledge this directly
- If buildings are predominantly pre-1980, mention earthquake vulnerability
- Connect rent levels to what the neighborhood offers
- Be honest about both strengths AND weaknesses
- Write in natural, flowing prose - avoid bullet points in your response
- Do NOT mention features with 0 count
- If something is missing or limited, say so explicitly

Write your balanced, informative description now:<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""

    return prompt

def generate_description(prompt, max_length=300):
    """Generate text using the model"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            temperature=0.5,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the assistant's response
    if "<|start_header_id|>assistant<|end_header_id|>" in generated_text:
        generated_text = generated_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()

    return generated_text

print("✅ Functions defined successfully!")

✅ Functions defined successfully!


In [11]:
# Test with first neighborhood
print("🧪 Testing with first neighborhood...\n")

test_row = df.iloc[0]
test_prompt = create_comprehensive_prompt(test_row)
test_description = generate_description(test_prompt)

print(f"📍 {test_row['Mahalle']}, {test_row['İlçe']}")
print(f"\n💫 Generated Description:")
print(f"{test_description}")
print("\n" + "="*80)
print("✅ Test successful! Ready to process all neighborhoods.")

🧪 Testing with first neighborhood...

📍 Balmumcu, Beşiktaş

💫 Generated Description:
system

You are a sociologist and real estate analyst specializing in Istanbul neighborhoods. Be genuinely helpful for people considering living there.

Your description should be concrete, sincere, and informative. Don't just list numbers - tell the neighborhood's story.user

Write a comprehensive description for this Istanbul neighborhood:

**NEIGHBORHOOD IDENTITY:**
- Name: Balmumcu, Beşiktaş district
- Smaller community of 5,988 residents - quiet and intimate
- Location: Latitude 41.059527, Longitude 29.015073

**QUALITY OF LIFE INDICES:**
- Quality of life data not available
- Abundant green space (index: 0.93) - parks and nature accessible
- Strong social fabric (welfare index: 1.00) - cohesive community

**POLITICAL & CIVIC LANDSCAPE:**
- No voting data available
- 

**CULTURAL CHARACTER:**
- Community character: traditional neighborhood character
- Cultural Activity Index: N/A

**URBAN FABRIC &

In [12]:
# Generate descriptions for all neighborhoods
from IPython.display import clear_output
import time

# Add a new column for descriptions
df['Description'] = ""

start_time = time.time()

for idx, row in df.iterrows():
    mahalle = row['Mahalle']
    ilce = row['İlçe']

    try:
        prompt = create_comprehensive_prompt(row)
        description = generate_description(prompt)
        df.at[idx, 'Description'] = description

        # Display progress
        clear_output(wait=True)
        progress = (idx + 1) / len(df) * 100
        elapsed = time.time() - start_time
        avg_time = elapsed / (idx + 1)
        remaining = avg_time * (len(df) - idx - 1)

        print(f"Progress: [{idx+1}/{len(df)}] {progress:.1f}%")
        print(f"⏱️  Time: {elapsed/60:.1f}m elapsed | ~{remaining/60:.1f}m remaining")
        print("="*60)
        print(f"\n📍 Current: {mahalle}, {ilce}")
        print(f"\n💫 Description:")
        print(f"{description[:200]}..." if len(description) > 200 else f"{description}")
        print("\n" + "="*60)

    except Exception as e:
        print(f"\n❌ Error processing {mahalle}: {e}")
        df.at[idx, 'Description'] = "Description generation failed"

total_time = time.time() - start_time
print(f"\n\n✅ Complete! Processed {len(df)} neighborhoods in {total_time/60:.1f} minutes")
print(f"⚡ Average: {total_time/len(df):.1f} seconds per neighborhood")

Progress: [164/164] 100.0%
⏱️  Time: 30.2m elapsed | ~0.0m remaining

📍 Current: Koşuyolu, Kadıköy

💫 Description:
system

You are a sociologist and real estate analyst specializing in Istanbul neighborhoods. Be genuinely helpful for people considering living there.

Your description should be concrete, sincere, a...



✅ Complete! Processed 164 neighborhoods in 30.2 minutes
⚡ Average: 11.0 seconds per neighborhood


In [15]:
# Preview the results
print("📊 Sample Results:\n")
print("="*100)

for i in range(min(5, len(df))):
    row = df.iloc[i]
    print(f"\n{i+1}. {row['Mahalle']}, {row['İlçe']}")
    print(f"   {row['Soulful_Description']}")
    print("-"*100)

# Show full dataframe preview
df[['Mahalle', 'İlçe', 'Soulful_Description']].head(10)

📊 Sample Results:


1. Balmumcu, Beşiktaş


KeyError: 'Soulful_Description'

In [16]:
# Save the results
output_filename = csv_filename.replace('.csv', '_with_descriptions.csv')
df.to_csv(output_filename, index=False)

print(f"💾 Saved results to: {output_filename}")
print(f"\n📥 Download your file:")

# Download the file
files.download(output_filename)

💾 Saved results to: istanbul_mahalle_complete_data_with_descriptions.csv

📥 Download your file:


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [18]:
# Optional: View statistics and analysis
print("📈 Statistics:\n")
print(f"Total neighborhoods processed: {len(df)}")
print(f"Successful descriptions: {df['Description'].notna().sum()}")
print(f"Average description length: {df['Description'].str.len().mean():.0f} characters")
print(f"Average word count: {df['Description'].str.split().str.len().mean():.0f} words")

# Show distribution by district
print("\n🏙️ Distribution by District:")
print(df['İlçe'].value_counts().head(10))

📈 Statistics:

Total neighborhoods processed: 164
Successful descriptions: 164
Average description length: 4718 characters
Average word count: 683 words

🏙️ Distribution by District:
İlçe
Fatih       57
Üsküdar     33
Beykoz      16
Bakırköy    15
Bağcılar    15
Kadıköy     12
Beşiktaş    11
Esenyurt     5
Name: count, dtype: int64
