In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import re
import importlib
from functions import *
import random
import folium
from GA import GeneticMetroPlanner;
from shapely.geometry import Polygon
import ast
import json

# 1) DATASETS and Data Cleaning

## a) Population Dataset

In [3]:
population_path = "/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/population_with_coordinates.csv"

In [4]:
raw_population_df = pd.read_csv(population_path , header=None, sep=",")
raw_population_df = raw_population_df.dropna(how='all', axis=1)
raw_population_df.columns = raw_population_df.iloc[0]
raw_population_df = raw_population_df.drop(index=0).reset_index(drop=True)
raw_population_df = raw_population_df.loc[:, raw_population_df.columns.notna()]
raw_population_df[['latitude', 'longitude']] = raw_population_df['coordinate'].str.split(',', expand=True)
raw_population_df['latitude'] = raw_population_df['latitude'].astype(float)
raw_population_df['longitude'] = raw_population_df['longitude'].astype(float)
raw_population_df['population'] = raw_population_df['population'].astype(int)
population_df = raw_population_df[['population', 'neighborhood', 'township', 'neighborhood_code', 'latitude', 'longitude']]
population_df = population_df.dropna()

In [5]:
population_df.rename(columns={'neighborhood_code' : 'neighborhood_id'} , inplace=True)

In [6]:
population_df.head()

Unnamed: 0,population,neighborhood,township,neighborhood_id,latitude,longitude
0,4693,Adnan Menderes,Arnavutköy,40490,41.211179,28.700163
1,47828,Anadolu,Arnavutköy,99359,41.186036,28.749463
2,23116,Arnavutköy Merkez,Arnavutköy,40478,41.182546,28.737891
3,10566,Atatürk,Arnavutköy,40482,41.190149,28.760125
4,12829,Boğazköy İstiklal,Arnavutköy,40483,41.183488,28.768235


In [7]:
population_df.describe()

Unnamed: 0,population,latitude,longitude
count,708.0,708.0,708.0
mean,21397.411017,41.029966,28.972329
std,15943.770157,0.071404,0.193073
min,2025.0,40.817868,28.409171
25%,10279.25,40.998081,28.854917
50%,17970.0,41.028968,28.98112
75%,28527.75,41.063456,29.109349
max,112367.0,41.269994,29.386538


## b) Metro Station Dataset

In [8]:
stations_gdf = gpd.read_file("/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/station.geojson")

In [9]:
stations_gdf["lat"] = stations_gdf.geometry.x
stations_gdf["lon"] = stations_gdf.geometry.y
stations_df = stations_gdf[["ISTASYON", "PROJE_ADI", "HAT_TURU", "lat", "lon"]]
stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]


In [10]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694


# Visulation
Initial maps

In [11]:
istanbul_coords = [41.0082, 28.9784]


map_istanbul = folium.Map(location=istanbul_coords, zoom_start=11, tiles='CartoDB positron')

for index, row in stations_df.iterrows():
    popup_text = f"{row['ISTASYON']}<br>{row['PROJE_ADI']}"
    folium.Marker(
        location=[row['lat'], row['lon']],
        popup=popup_text,
        icon=folium.Icon(color='blue' if row['HAT_TURU'] == 'Metro' else 'green', icon='train')
    ).add_to(map_istanbul)

map_istanbul.save("maps/initial_metro_lines.html")

In [12]:
m = folium.Map(location=istanbul_coords, zoom_start=10 , tiles='CartoDB positron')


for _, row in population_df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"{row['neighborhood']} ({row['township']})\nNüfus: {row['population']}",
        tooltip=row['neighborhood']
    ).add_to(m)

m.save("maps/neighborhood.html")

# Calculating grids
These are going to be candidate stations

In [13]:
with open("coordinates/coordinates_europe.txt" , "r") as file:
    lines_eu = file.readlines()

In [14]:
coordinates_eu = []
for line in lines_eu:
    line = line.strip()
    if not line:
        continue  # boş satırı atla
    try:
        lon, lat = map(float, line.split(","))
        coordinates_eu.append((lon, lat))  # GeoJSON: [longitude, latitude]
    except ValueError:
        print(f"Geçersiz satır atlandı: {line}")

In [15]:
print(len(coordinates_eu))

45


In [16]:
if coordinates_eu[0] != coordinates_eu[-1]:
    coordinates_eu.append(coordinates_eu[0])


In [17]:
polygon_eu = Polygon(coordinates_eu)

In [18]:
grid_eu = create_grid_for_polygon(polygon_eu)

In [19]:
grid_df_eu = pd.DataFrame(grid_eu)

In [20]:
grid_df_eu.head()

Unnamed: 0,station_id,lat,lon
0,1,41.086525,28.911734
1,2,40.978832,28.635453
2,3,41.043488,28.756927
3,4,40.998649,28.858029
4,5,41.111508,28.806758


In [21]:
m = folium.Map(location=istanbul_coords, zoom_start=10, tiles='CartoDB positron')

for _, row in grid_df_eu.iterrows():
    folium.Marker(
        location=[row['lat'], row['lon']],  
        popup=f"station id: {row['station_id']}"
    ).add_to(m)

m.save("maps/grid_eu.html")

In [22]:
with open("coordinates/coordinates_asia.txt" , "r") as file:
    lines_as = file.readlines()

In [23]:
coordinates_as = []
for line in lines_as:
    line = line.strip()
    if not line:
        continue  # boş satırı atla
    try:
        lon, lat = map(float, line.split(","))
        coordinates_as.append((lon, lat))  # GeoJSON: [longitude, latitude]
    except ValueError:
        print(f"Geçersiz satır atlandı: {line}")

In [24]:
print(len(coordinates_as))

34


In [25]:
if coordinates_as[0] != coordinates_as[-1]:
    coordinates_as.append(coordinates_as[0])

In [26]:
polygon_as = Polygon(coordinates_as)

In [27]:
grid_as = create_grid_for_polygon(polygon_as)

In [28]:
grid_df_as = pd.DataFrame(grid_as)

In [29]:
grid_df_as['station_id'] += 1000

In [30]:
grid_df_as.head()

Unnamed: 0,station_id,lat,lon
0,1001,41.005446,29.238085
1,1002,41.028116,29.187794
2,1003,40.990808,29.228939
3,1004,40.822102,29.285153
4,1005,40.902404,29.299563


In [31]:
m = folium.Map(location=istanbul_coords, zoom_start=10, tiles='CartoDB positron')

for _, row in grid_df_as.iterrows():
    folium.Marker(
        location=[row['lat'], row['lon']],  
        popup=f"station id: {row['station_id']}"
    ).add_to(m)

m.save("maps/grid_as.html")

In [42]:
grid_df = pd.concat([grid_df_eu , grid_df_as] , ignore_index=True)

In [43]:
m = folium.Map(location=istanbul_coords, zoom_start=10, tiles='CartoDB positron')

for _, row in grid_df.iterrows():
    folium.Marker(
        location=[row['lat'], row['lon']],  
        popup=f"station id: {row['station_id']}"
    ).add_to(m)

m.save("maps/grid_istanbul.html")

In [44]:
grid_df['TYPE'] = 'candidate'
stations_df['TYPE'] = 'existing'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df['TYPE'] = 'existing'


In [45]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon,TYPE
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681,existing
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398,existing
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809,existing
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234,existing
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694,existing


In [46]:
grid_df.head()

Unnamed: 0,station_id,lat,lon,TYPE
0,1,41.086525,28.911734,candidate
1,2,40.978832,28.635453,candidate
2,3,41.043488,28.756927,candidate
3,4,40.998649,28.858029,candidate
4,5,41.111508,28.806758,candidate


In [46]:
print(grid_df.dtypes)

station_id      int64
lat           float64
lon           float64
TYPE           object
dtype: object


In [47]:
grid_df.describe()

Unnamed: 0,station_id,lat,lon
count,2000.0,2000.0,2000.0
mean,1000.5,41.005671,29.00701
std,577.494589,0.071553,0.220327
min,1.0,40.815976,28.514429
25%,500.75,40.964383,28.825122
50%,1000.5,41.008788,29.04336
75%,1500.25,41.052774,29.19631
max,2000.0,41.216205,29.387177


In [47]:
all_stations = pd.concat([
    grid_df[['station_id', 'lat', 'lon' , 'TYPE']],
    stations_df[['ISTASYON', 'lat', 'lon' , 'TYPE']].rename(columns={'ISTASYON': 'station_id'})
]).reset_index(drop=True)


In [48]:
all_stations = all_stations.reset_index(drop=True)
all_stations['station_id'] = all_stations.index + 34001

In [49]:
all_stations.tail()

Unnamed: 0,station_id,lat,lon,TYPE
2338,36339,40.990637,29.104066,existing
2339,36340,40.97835,29.06629,existing
2340,36341,41.042728,29.098798,existing
2341,36342,41.056077,28.813604,existing
2342,36343,41.045517,28.93796,existing


In [50]:
all_stations_neighboor = calculate_arrived_neighborhood_per_station(all_stations , population_df)

In [51]:
all_stations_neighboor.head()

Unnamed: 0,station_id,lat,lon,TYPE,arrived_neighborhoods
0,34001,41.086525,28.911734,candidate,"{40461, 40462}"
1,34002,40.978832,28.635453,candidate,{40873}
2,34003,41.043488,28.756927,candidate,{40599}
3,34004,40.998649,28.858029,candidate,"{40177, 40179, 40178}"
4,34005,41.111508,28.806758,candidate,{40357}


### Extracting Metro Lines and Stations
We have not a extract metro lines and their stations dictionary. For further operation we need this dict .

In [54]:
TOLERANCE = 0.0005

project_dict = defaultdict(list)

existing_stations = all_stations[all_stations['TYPE'] == 'existing']

for idx, row in stations_df.iterrows():
    proje_adi_full = row['PROJE_ADI']
    lat = row['lat']
    lon = row['lon']

    match_hat = re.search(r'\b(M\d+[A-Z]?|T\d+)\b', proje_adi_full)
    if not match_hat:
        continue  
    hat_kodu = match_hat.group()


    match = existing_stations[
        (existing_stations['lat'].sub(lat).abs() < TOLERANCE) &
        (existing_stations['lon'].sub(lon).abs() < TOLERANCE)
    ]

    if not match.empty:
        for station_id in match['station_id']:
            project_dict[hat_kodu].append(station_id)

project_dict = dict(project_dict)


In [55]:
print(project_dict)

{'T1': [36001, 36017, 36025, 36027, 36031, 36032, 36033, 36055, 36056, 36065, 36066, 36071, 36075, 36094, 36095, 36097, 36101, 36107, 36125, 36126, 36127, 36128, 36131, 36135, 36150, 36151, 36154, 36185, 36193, 36196, 36209, 36224], 'M4': [36002, 36009, 36010, 36038, 36091, 36099, 36110, 36112, 36129, 36134, 36139, 36153, 36157, 36124, 36158, 36162, 36165, 36181, 36186, 36192, 36261, 36262, 36263, 36264, 36265, 36266, 36267, 36268, 36269, 36270], 'M1A': [36003, 36041, 36045, 36047, 36058, 36061, 36069, 36098, 36103, 36133, 36148, 36183, 36184, 36190, 36197, 36199, 36211, 36212, 36214], 'M9': [36004, 36084, 36187, 36160, 36230, 36250, 36283, 36284, 36285, 36286, 36287, 36288, 36326, 36289, 36114, 36290, 36291], 'T4': [36005, 36006, 36023, 36040, 36042, 36048, 36064, 36067, 36068, 36072, 36073, 36074, 36096, 36102, 36108, 36132, 36156, 36163, 36191, 36198, 36210, 36215], 'M5': [36012, 36014, 36079, 36111, 36115, 36118, 36141, 36144, 36147, 36167, 36171, 36173, 36175, 36176, 36177, 36200,

In [63]:
existing_stations_dict = {
    'T1': [36001, 36017, 36025, 36027, 36031, 36032, 36033, 36055, 36056, 36065, 36066, 36071, 36075, 36094, 36095, 36097, 36101, 36125, 
           36126, 36127, 36128, 36131, 36135, 36150, 36151, 36154, 36185, 36193, 36196, 36209, 36224 , 36107], 
    'M4': [36002, 36009, 36010, 36038, 36091, 36099, 36110, 36112, 36129, 36134, 36139, 36153, 36157, 36124, 36158, 36162, 36165, 36181, 36186, 
           36192, 36261, 36262, 36263, 36264, 36265, 36266,  36268, 36269, 36270 , 36267], 
    'M1A': [36003, 36041, 36045, 36047, 36058, 36061, 36069, 36098, 36103, 36133, 36183, 36184, 36190, 36197, 36199, 36211, 36212, 36214 ,36148], 
    'M9': [36084, 36187, 36160, 36230, 36250, 36283, 36284, 36285, 36286, 36287, 36288, 36326, 36289, 36114, 36290, 36291 , 36004], 
    'T4': [36005, 36006,  36040, 36042, 36048, 36064, 36067, 36068, 36072, 36073, 36074, 36096, 36102, 36108, 36132, 36156, 36163, 36191, 
           36198, 36210, 36215 , 36023], 
    'M5': [36012, 36014, 36079, 36111, 36115, 36118, 36141, 36144, 36147, 36167, 36171, 36173, 36175, 36176, 36177, 
                                        36200, 36217, 36254, 36255, 36257, 36258, 36259, 36260 ,36256], 
    'M7': [36015, 36037, 36052, 36053, 36078, 36080, 36082, 36113, 36116, 36121, 36143, 36146, 36170, 36203, 36207, 36219, 36220, 36221,  
           36301, 36302, 36303, 36304, 36305, 36306, 36307, 36308, 36309, 36310, 36342 , 36222], 
    'T5': [36018, 36019, 36022, 36039, 36059, 36155, 36225, 36226, 36227, 36282, 36314, 36343 , 36020],
    'M2': [36024, 36216, 36030, 36044, 36060, 36063, 36089, 36090,  36105, 36109, 36149, 36164, 36166, 36182, 36194, 36024, 36216, 36223 , 36104], 
    'M3': [36026, 36057, 36028, 36043, 36070, 36106, 36137, 36160, 36230, 36189, 36274, 36277, 36278, 36280, 36281, 36311, 36312, 36327, 
           36328, 36329, 36330 , 36161],
    'M6': [36049, 36159, 36188 , 36140],
    'M1B': [36026, 36057, 36136, 36138, 36152, 36195, 36275, 36276, 36279, 36316, 36319, 36321, 36322, 36288, 36326 ,36313], 
    'M8': [36218, 36271, 36273, 36292, 36293, 36294, 36295, 36296, 36297, 36298, 36299, 36300 , 36272], 
    'M11': [36240, 36241, 36242, 36243, 36244, 36245, 36246, 36247, 36318, 36320, 36323, 36324, 36325 , 36315], 
    'M12': [36331, 36332, 36333, 36334, 36335, 36336, 36337, 36338, 36339, 36340, 36341]}

we dont know last stations of metro lines. i added last station end of the metro line array.

In [64]:
m_old = visualize_chromosome(existing_stations_dict , all_stations)
m_old.save("maps/initial_metro_with_color.html")

we should calculate the connectivitiy dictionary . this dict shows which station could be connected whichs stations 

In [65]:
connectivity_dict = calculate_connectivity_dict(all_stations)

In [66]:
print(len(connectivity_dict))

2340


# GA

In [67]:
GeneticAlgorithm = GeneticMetroPlanner(
    all_stations_df = all_stations_neighboor,
    neighborhood_df=population_df,
    connectivity_dict = connectivity_dict,
    existing_lines_dict = existing_stations_dict,
    mutation_line_rate=1)

In [68]:
for line , stations in existing_stations_dict.items():
    print(f"{line}:  {len(stations)} station")

T1:  32 station
M4:  30 station
M1A:  19 station
M9:  17 station
T4:  22 station
M5:  24 station
M7:  30 station
T5:  13 station
M2:  19 station
M3:  22 station
M6:  4 station
M1B:  16 station
M8:  13 station
M11:  14 station
M12:  11 station


In [69]:
chromosome = GeneticAlgorithm.add_metro_stations(existing_stations_dict)
for line, stations in chromosome.items():
    print(f"{line}: {len(stations)} station")

T1: 33 station
M4: 31 station
M1A: 19 station
M9: 19 station
T4: 24 station
M5: 25 station
M7: 32 station
T5: 15 station
M2: 21 station
M3: 22 station
M6: 4 station
M1B: 16 station
M8: 13 station
M11: 15 station
M12: 13 station


In [70]:
print(chromosome)

{'T1': [36001, 36017, 36025, 36027, 36031, 36032, 36033, 36055, 36056, 36065, 36066, 36071, 36075, 36094, 36095, 36097, 36101, 36125, 36126, 36127, 36128, 36131, 36135, 36150, 36151, 36154, 36185, 36193, 36196, 36209, 36224, 36107, 34213], 'M4': [36002, 36009, 36010, 36038, 36091, 36099, 36110, 36112, 36129, 36134, 36139, 36153, 36157, 36124, 36158, 36162, 36165, 36181, 36186, 36192, 36261, 36262, 36263, 36264, 36265, 36266, 36268, 36269, 36270, 36267, 35029], 'M1A': [36003, 36041, 36045, 36047, 36058, 36061, 36069, 36098, 36103, 36133, 36183, 36184, 36190, 36197, 36199, 36211, 36212, 36214, 36148], 'M9': [36084, 36187, 36160, 36230, 36250, 36283, 36284, 36285, 36286, 36287, 36288, 36326, 36289, 36114, 36290, 36291, 36004, 34425, 34979], 'T4': [36005, 36006, 36040, 36042, 36048, 36064, 36067, 36068, 36072, 36073, 36074, 36096, 36102, 36108, 36132, 36156, 36163, 36191, 36198, 36210, 36215, 36023, 34947, 34524], 'M5': [36012, 36014, 36079, 36111, 36115, 36118, 36141, 36144, 36147, 36167,

In [71]:
print(GeneticAlgorithm.calculate_transfer_number(chromosome))

32


In [72]:
m_n = visualize_chromosome(chromosome , all_stations)

In [73]:
m_n.save("maps/chromosome_map_first_iteration.html")

In [74]:
print(GeneticAlgorithm.calculate_population_for_chromosome(chromosome))

8799089.000000004


In [75]:
GeneticAlgorithm.generate_initial_population()

In [76]:
print(len(GeneticAlgorithm.population))

10


In [77]:
GeneticAlgorithm.fitness()

In [78]:
print(GeneticAlgorithm.fitness_values)

[np.float64(3.9414330337389245), np.float64(3.7290355592311655), np.float64(3.7847858343595933), np.float64(3.7529775302709236), np.float64(3.9439457545351995), np.float64(3.840069073442893), np.float64(3.8015474507217095), np.float64(4.033935383068237), np.float64(3.798202198648962), np.float64(3.9684182752392303)]


# Implementation

In [81]:
planner = GeneticMetroPlanner(
    all_stations_df=all_stations_neighboor,
    neighborhood_df=population_df,
    connectivity_dict=connectivity_dict,
    existing_lines_dict=existing_stations_dict,
    mutation_rate=0.2,
    mutation_line_rate=0.4,
    generation_number=5,
    child_number=10,
    max_per_station=1,
    w2= 1
)


In [82]:
planner.run()

Algorithm is started.
Initial generation is created.
Generation 1: Best fitness = 1.3560377236487595
Population : 8464683.0
Cost : 9
Generation 2: Best fitness = 1.3421793562886875
Population : 8684920.000000006
Cost : 15
Generation 3: Best fitness = 1.3671927345176433
Population : 9016024.000000002
Cost : 42
Generation 4: Best fitness = 1.2645554843453735
Population : 9061150.000000013
Cost : 54
Generation 5: Best fitness = 1.2166948617213058
Population : 9265251.000000015
Cost : 70


In [83]:
planner.details()

Population : 9265251.000000015
Cost : 70
The number of transfer stations : 44
The number of initial metro lines : 15 | The number of result metro lines : 23 
The number of initial metro stations : 278 | The number of result metro stations : 348


In [84]:
m_best = visualize_chromosome(planner.best_chromosome , all_stations)

In [85]:
m_best.save('maps/best_metro_lines.html')