In [51]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import re
import importlib
from functions import *
from GA import *

# 1) DATASETS and Data Cleaning

## a) Population Dataset

In [2]:
population_path = "/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/population_with_coordinates.csv"

In [3]:
raw_population_df = pd.read_csv(population_path , header=None, sep=",")

In [4]:
raw_population_df = raw_population_df.dropna(how='all', axis=1)

In [5]:
raw_population_df.columns = raw_population_df.iloc[0]

In [6]:
raw_population_df = raw_population_df.drop(index=0).reset_index(drop=True)
raw_population_df = raw_population_df.loc[:, raw_population_df.columns.notna()]

In [7]:
raw_population_df[['latitude', 'longitude']] = raw_population_df['coordinate'].str.split(',', expand=True)

In [8]:
raw_population_df['latitude'] = raw_population_df['latitude'].astype(float)
raw_population_df['longitude'] = raw_population_df['longitude'].astype(float)
raw_population_df['population'] = raw_population_df['population'].astype(int)

In [9]:
population_df = raw_population_df[['population', 'neighborhood', 'township', 'neighborhood_code', 'latitude', 'longitude']]

In [10]:
population_df = population_df.dropna()

In [11]:
population_df.head()

Unnamed: 0,population,neighborhood,township,neighborhood_code,latitude,longitude
0,4693,Adnan Menderes,Arnavutköy,40490,41.211179,28.700163
1,47828,Anadolu,Arnavutköy,99359,41.186036,28.749463
2,23116,Arnavutköy Merkez,Arnavutköy,40478,41.182546,28.737891
3,10566,Atatürk,Arnavutköy,40482,41.190149,28.760125
4,12829,Boğazköy İstiklal,Arnavutköy,40483,41.183488,28.768235


In [12]:
population_df.describe()

Unnamed: 0,population,latitude,longitude
count,708.0,708.0,708.0
mean,21397.411017,41.029966,28.972329
std,15943.770157,0.071404,0.193073
min,2025.0,40.817868,28.409171
25%,10279.25,40.998081,28.854917
50%,17970.0,41.028968,28.98112
75%,28527.75,41.063456,29.109349
max,112367.0,41.269994,29.386538


## b) Metro Station Dataset

In [13]:
stations_gdf = gpd.read_file("/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/station.geojson")

In [14]:
print(stations_gdf.head())

                   ISTASYON                                      PROJE_ADI  \
0               Mehmet Akif            T1 Kabataş - Bağcılar Tramvay Hattı   
1                  Soğanlık                   M4 Kadıköy - SGH Metro Hattı   
2                   Aksaray  M1A Yenikapı - Atatürk Havalimanı Metro Hattı   
3                 Olimpiyat            M9 Bahariye - Olimpiyat Metro Hattı   
4  Sağmalcılar-Pancar Motor      T4 Topkapı - Mescid-i Selam Tramvay Hattı   

               PROJE_ASAMA HAT_TURU                                  MUDURLUK  \
0  Mevcut Hattaki İstasyon  Tramvay   Avrupa Yakası Raylı Sistemler Müdürlüğü   
1  Mevcut Hattaki İstasyon    Metro  Anadolu Yakası Raylı Sistemler Müdürlüğü   
2  Mevcut Hattaki İstasyon    Metro   Avrupa Yakası Raylı Sistemler Müdürlüğü   
3  Mevcut Hattaki İstasyon    Metro   Avrupa Yakası Raylı Sistemler Müdürlüğü   
4  Mevcut Hattaki İstasyon  Tramvay   Avrupa Yakası Raylı Sistemler Müdürlüğü   

                    geometry  
0   POINT (28

In [15]:
stations_gdf["lat"] = stations_gdf.geometry.x
stations_gdf["lon"] = stations_gdf.geometry.y

In [16]:
stations_df = stations_gdf[["ISTASYON", "PROJE_ADI", "HAT_TURU", "lat", "lon"]]

In [17]:
stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]


In [18]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694


# Visulation

In [19]:
import folium


istanbul_coords = [41.0082, 28.9784]


map_istanbul = folium.Map(location=istanbul_coords, zoom_start=11, tiles='CartoDB positron')

for index, row in stations_df.iterrows():
    popup_text = f"{row['ISTASYON']}<br>{row['PROJE_ADI']}"
    folium.Marker(
        location=[row['lat'], row['lon']],
        popup=popup_text,
        icon=folium.Icon(color='blue' if row['HAT_TURU'] == 'Metro' else 'green', icon='train')
    ).add_to(map_istanbul)



map_istanbul

In [20]:
m = folium.Map(location=istanbul_coords, zoom_start=10)


for _, row in population_df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"{row['neighborhood']} ({row['township']})\nNüfus: {row['population']}",
        tooltip=row['neighborhood']
    ).add_to(m)

m

In [21]:
lat_min, lat_max = 40.977, 41.14
lon_min, lon_max = 28.70, 29.0 

In [22]:
num_lat_grids = 30
num_lon_grids = 30

In [23]:
grid_list , candidate_stations = create_grid(
    lat_min = lat_min , lon_min = lon_min , lat_max = lat_max , lon_max = lon_max)

In [24]:
grid_df = pd.DataFrame(grid_list)
grid_df.head()

Unnamed: 0,grid_id,lat_start,lat_end,lon_start,lon_end
0,"(0, 0)",40.977,40.982433,28.7,28.71
1,"(0, 1)",40.977,40.982433,28.71,28.72
2,"(0, 2)",40.977,40.982433,28.72,28.73
3,"(0, 3)",40.977,40.982433,28.73,28.74
4,"(0, 4)",40.977,40.982433,28.74,28.75


In [25]:
candidate_stations_df = pd.DataFrame(candidate_stations)
candidate_stations_df.head()

Unnamed: 0,station_id,lat,lon
0,"(0, 0)",40.979717,28.705
1,"(0, 1)",40.979717,28.715
2,"(0, 2)",40.979717,28.725
3,"(0, 3)",40.979717,28.735
4,"(0, 4)",40.979717,28.745


In [26]:
candidate_stations_df.describe()

Unnamed: 0,lat,lon
count,900.0,900.0
mean,41.0585,28.85
std,0.047054,0.086603
min,40.979717,28.705
25%,41.01775,28.775
50%,41.0585,28.85
75%,41.09925,28.925
max,41.137283,28.995


In [27]:
candidate_stations_df['TYPE'] = 'candidate'
stations_df['TYPE'] = 'existing'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df['TYPE'] = 'existing'


In [28]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon,TYPE
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681,existing
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398,existing
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809,existing
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234,existing
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694,existing


In [29]:
candidate_stations_df.head()

Unnamed: 0,station_id,lat,lon,TYPE
0,"(0, 0)",40.979717,28.705,candidate
1,"(0, 1)",40.979717,28.715,candidate
2,"(0, 2)",40.979717,28.725,candidate
3,"(0, 3)",40.979717,28.735,candidate
4,"(0, 4)",40.979717,28.745,candidate


In [30]:
all_stations = pd.concat([
    candidate_stations_df[['station_id', 'lat', 'lon' , 'TYPE']],
    stations_df[['ISTASYON', 'lat', 'lon' , 'TYPE']].rename(columns={'ISTASYON': 'station_id'})
]).reset_index(drop=True)


In [31]:
all_stations = all_stations.reset_index(drop=True)
all_stations['station_id'] = all_stations.index + 34001

In [32]:
all_stations.head()

Unnamed: 0,station_id,lat,lon,TYPE
0,34001,40.979717,28.705,candidate
1,34002,40.979717,28.715,candidate
2,34003,40.979717,28.725,candidate
3,34004,40.979717,28.735,candidate
4,34005,40.979717,28.745,candidate


In [33]:
all_stations_pop = calculate_population_per_station(all_stations , population_df)

In [34]:
all_stations_pop_filtered = all_stations_pop[
    ~((all_stations_pop['arrived_population'] < 1000) & (all_stations_pop['TYPE'] == 'candidate'))
]

all_stations_pop_filtered = all_stations_pop_filtered.sort_values(by='arrived_population' , ascending = False)

In [35]:
all_stations_pop_filtered.head()

Unnamed: 0,station_id,lat,lon,TYPE,arrived_population
164,34165,41.006883,28.845,candidate,302574
134,34135,41.00145,28.845,candidate,294879
104,34105,40.996017,28.845,candidate,261326
225,34226,41.01775,28.855,candidate,259554
1177,35178,41.019391,28.857845,existing,235721


In [36]:
all_stations_pop_filtered.describe()

Unnamed: 0,station_id,lat,lon,arrived_population
count,866.0,866.0,866.0,866.0
mean,34654.267898,41.034148,28.910335,71616.110855
std,383.173053,0.055695,0.126874,48453.87369
min,34001.0,40.784064,28.670765,0.0
25%,34318.25,41.006883,28.825,34711.5
50%,34605.5,41.03405,28.895,66030.5
75%,35026.75,41.06822,28.965,102849.5
max,35243.0,41.256408,29.409966,302574.0


In [37]:
from collections import defaultdict
import re

# Eşleştirme toleransı (örneğin 0.0005 derece, yani yaklaşık 50 metre)
TOLERANCE = 0.0005

# Boş dictionary (defaultdict ile list olarak başlatılıyor)
project_dict = defaultdict(list)

# existing istasyonları filtrele
existing_stations = all_stations_pop[all_stations_pop['TYPE'] == 'existing']

# Her proje için istasyonları eşle
for idx, row in stations_df.iterrows():
    proje_adi_full = row['PROJE_ADI']
    lat = row['lat']
    lon = row['lon']

    # Hat kodunu ayıkla: Örneğin "M1A", "T4", "M11"
    match_hat = re.search(r'\b(M\d+[A-Z]?|T\d+)\b', proje_adi_full)
    if not match_hat:
        continue  # Hat kodu yoksa atla
    hat_kodu = match_hat.group()

    # Bu projedeki istasyonun konumuna yakın existing istasyonu bul
    match = existing_stations[
        (existing_stations['lat'].sub(lat).abs() < TOLERANCE) &
        (existing_stations['lon'].sub(lon).abs() < TOLERANCE)
    ]

    if not match.empty:
        for station_id in match['station_id']:
            project_dict[hat_kodu].append(station_id)

# Sonuç dict olarak:
project_dict = dict(project_dict)


In [38]:
print(project_dict)

{'T1': [34901, 34917, 34925, 34927, 34931, 34932, 34933, 34955, 34956, 34965, 34966, 34971, 34975, 34994, 34995, 34997, 35001, 35007, 35025, 35026, 35027, 35028, 35031, 35035, 35050, 35051, 35054, 35085, 35093, 35096, 35109, 35124], 'M4': [34902, 34909, 34910, 34938, 34991, 34999, 35010, 35012, 35029, 35034, 35039, 35053, 35057, 35024, 35058, 35062, 35065, 35081, 35086, 35092, 35161, 35162, 35163, 35164, 35165, 35166, 35167, 35168, 35169, 35170], 'M1A': [34903, 34941, 34945, 34947, 34958, 34961, 34969, 34998, 35003, 35033, 35048, 35083, 35084, 35090, 35097, 35099, 35111, 35112, 35114], 'M9': [34904, 34984, 35087, 35060, 35130, 35150, 35183, 35184, 35185, 35186, 35187, 35188, 35226, 35189, 35014, 35190, 35191], 'T4': [34905, 34906, 34923, 34940, 34942, 34948, 34964, 34967, 34968, 34972, 34973, 34974, 34996, 35002, 35008, 35032, 35056, 35063, 35091, 35098, 35110, 35115], 'M5': [34912, 34914, 34979, 35011, 35015, 35018, 35041, 35044, 35047, 35067, 35071, 35073, 35075, 35076, 35077, 35100,

In [39]:
connectivity_dict = calculate_connectivity_dict(all_stations_pop)

In [47]:
print(len(connectivity_dict))

1240


In [42]:
GeneticAlgorithm = GeneticMetroPlanner(
    all_stations_df = all_stations_pop_filtered,
    connectivity_dict = connectivity_dict,
    existing_lines_dict = project_dict)

In [43]:
chromosome = GeneticAlgorithm.generate_chromosome()
for line, stations in chromosome.items():
    print(f"{line}: {len(stations)} istasyon")

T1: 33 istasyon
M4: 30 istasyon
M1A: 19 istasyon
M9: 18 istasyon
T4: 24 istasyon
M5: 29 istasyon
M7: 32 istasyon
T3: 10 istasyon
T5: 15 istasyon
M2: 19 istasyon
M3: 23 istasyon
M6: 9 istasyon
M1B: 18 istasyon
M8: 13 istasyon
M11: 16 istasyon
M10: 2 istasyon
M12: 11 istasyon


In [45]:
for line , stations in project_dict.items():
    print(f"{line}:  {len(stations)} istasyon")

T1:  32 istasyon
M4:  30 istasyon
M1A:  19 istasyon
M9:  17 istasyon
T4:  22 istasyon
M5:  24 istasyon
M7:  30 istasyon
T3:  10 istasyon
T5:  13 istasyon
M2:  19 istasyon
M3:  22 istasyon
M6:  4 istasyon
M1B:  16 istasyon
M8:  13 istasyon
M11:  14 istasyon
M10:  2 istasyon
M12:  11 istasyon


In [46]:
print(chromosome)

{'T1': [34901, 34917, 34925, 34927, 34931, 34932, 34933, 34955, 34956, 34965, 34966, 34971, 34975, 34994, 34995, 34997, 35001, 35007, 35025, 35026, 35027, 35028, 35031, 35035, 35050, 35051, 35054, 35085, 35093, 35096, 35109, 35124, 34197], 'M4': [34902, 34909, 34910, 34938, 34991, 34999, 35010, 35012, 35029, 35034, 35039, 35053, 35057, 35024, 35058, 35062, 35065, 35081, 35086, 35092, 35161, 35162, 35163, 35164, 35165, 35166, 35167, 35168, 35169, 35170], 'M1A': [34903, 34941, 34945, 34947, 34958, 34961, 34969, 34998, 35003, 35033, 35048, 35083, 35084, 35090, 35097, 35099, 35111, 35112, 35114], 'M9': [34904, 34984, 35087, 35060, 35130, 35150, 35183, 35184, 35185, 35186, 35187, 35188, 35226, 35189, 35014, 35190, 35191, 34544], 'T4': [34905, 34906, 34923, 34940, 34942, 34948, 34964, 34967, 34968, 34972, 34973, 34974, 34996, 35002, 35008, 35032, 35056, 35063, 35091, 35098, 35110, 35115, 34382, 34236], 'M5': [34912, 34914, 34979, 35011, 35015, 35018, 35041, 35044, 35047, 35067, 35071, 35073,

In [49]:
m = visualize_chromosome(chromosome , all_stations)

NameError: name 'visualize_chromosome' is not defined