In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import re
import importlib
from functions import *
import random
import folium
from GA import *
from shapely.geometry import Polygon

# 1) DATASETS and Data Cleaning

## a) Population Dataset

In [3]:
population_path = "/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/population_with_coordinates.csv"

In [4]:
raw_population_df = pd.read_csv(population_path , header=None, sep=",")
raw_population_df = raw_population_df.dropna(how='all', axis=1)
raw_population_df.columns = raw_population_df.iloc[0]
raw_population_df = raw_population_df.drop(index=0).reset_index(drop=True)
raw_population_df = raw_population_df.loc[:, raw_population_df.columns.notna()]
raw_population_df[['latitude', 'longitude']] = raw_population_df['coordinate'].str.split(',', expand=True)
raw_population_df['latitude'] = raw_population_df['latitude'].astype(float)
raw_population_df['longitude'] = raw_population_df['longitude'].astype(float)
raw_population_df['population'] = raw_population_df['population'].astype(int)
population_df = raw_population_df[['population', 'neighborhood', 'township', 'neighborhood_code', 'latitude', 'longitude']]
population_df = population_df.dropna()

In [5]:
population_df.head()

Unnamed: 0,population,neighborhood,township,neighborhood_code,latitude,longitude
0,4693,Adnan Menderes,Arnavutköy,40490,41.211179,28.700163
1,47828,Anadolu,Arnavutköy,99359,41.186036,28.749463
2,23116,Arnavutköy Merkez,Arnavutköy,40478,41.182546,28.737891
3,10566,Atatürk,Arnavutköy,40482,41.190149,28.760125
4,12829,Boğazköy İstiklal,Arnavutköy,40483,41.183488,28.768235


In [6]:
population_df.describe()

Unnamed: 0,population,latitude,longitude
count,708.0,708.0,708.0
mean,21397.411017,41.029966,28.972329
std,15943.770157,0.071404,0.193073
min,2025.0,40.817868,28.409171
25%,10279.25,40.998081,28.854917
50%,17970.0,41.028968,28.98112
75%,28527.75,41.063456,29.109349
max,112367.0,41.269994,29.386538


## b) Metro Station Dataset

In [7]:
stations_gdf = gpd.read_file("/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/station.geojson")

In [8]:
stations_gdf["lat"] = stations_gdf.geometry.x
stations_gdf["lon"] = stations_gdf.geometry.y
stations_df = stations_gdf[["ISTASYON", "PROJE_ADI", "HAT_TURU", "lat", "lon"]]
stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]


In [9]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694


# Visulation
Initial maps

In [10]:
istanbul_coords = [41.0082, 28.9784]


map_istanbul = folium.Map(location=istanbul_coords, zoom_start=11, tiles='CartoDB positron')

for index, row in stations_df.iterrows():
    popup_text = f"{row['ISTASYON']}<br>{row['PROJE_ADI']}"
    folium.Marker(
        location=[row['lat'], row['lon']],
        popup=popup_text,
        icon=folium.Icon(color='blue' if row['HAT_TURU'] == 'Metro' else 'green', icon='train')
    ).add_to(map_istanbul)

map_istanbul.save("initial_metro_lines.html")

In [11]:
m = folium.Map(location=istanbul_coords, zoom_start=10 , tiles='CartoDB positron')


for _, row in population_df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"{row['neighborhood']} ({row['township']})\nNüfus: {row['population']}",
        tooltip=row['neighborhood']
    ).add_to(m)

m.save("neighborhood.html")

# Calculating grids
These are going to be candidate stations

In [12]:
polygon = Polygon([(40.963, 28.605) , (41.000, 28.984) ,  (41.168, 29.051) , (41.098, 28.578) ])

In [15]:
grid_list = create_grid_for_polygon(polygon)

In [16]:
print(len(grid_list))

1000


In [17]:
grid_df = pd.DataFrame(grid_list)
grid_df.head()

Unnamed: 0,station_id,lat,lon
0,1,41.011234,28.847162
1,2,41.087647,28.987508
2,3,41.007238,28.66103
3,4,41.084536,28.642964
4,5,41.053466,28.934142


In [18]:
grid_df['TYPE'] = 'candidate'
stations_df['TYPE'] = 'existing'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df['TYPE'] = 'existing'


In [19]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon,TYPE
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681,existing
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398,existing
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809,existing
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234,existing
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694,existing


In [20]:
grid_df.head()

Unnamed: 0,station_id,lat,lon,TYPE
0,1,41.011234,28.847162,candidate
1,2,41.087647,28.987508,candidate
2,3,41.007238,28.66103,candidate
3,4,41.084536,28.642964,candidate
4,5,41.053466,28.934142,candidate


In [21]:
print(grid_df.dtypes)

station_id      int64
lat           float64
lon           float64
TYPE           object
dtype: object


In [22]:
m = folium.Map(location=istanbul_coords, zoom_start=10, tiles='CartoDB positron')

for _, row in grid_df.iterrows():
    folium.Marker(
        location=[row['lat'], row['lon']],  
        popup=f"station id: {row['station_id']}"
    ).add_to(m)

m.save("grid.html")

In [23]:
all_stations = pd.concat([
    grid_df[['station_id', 'lat', 'lon' , 'TYPE']],
    stations_df[['ISTASYON', 'lat', 'lon' , 'TYPE']].rename(columns={'ISTASYON': 'station_id'})
]).reset_index(drop=True)


In [24]:
all_stations = all_stations.reset_index(drop=True)
all_stations['station_id'] = all_stations.index + 34001

In [25]:
all_stations.head()

Unnamed: 0,station_id,lat,lon,TYPE
0,34001,41.011234,28.847162,candidate
1,34002,41.087647,28.987508,candidate
2,34003,41.007238,28.66103,candidate
3,34004,41.084536,28.642964,candidate
4,34005,41.053466,28.934142,candidate


In [26]:
all_stations_pop = calculate_population_per_station(all_stations , population_df)

In [27]:
all_stations_pop.head()

Unnamed: 0,station_id,lat,lon,TYPE,arrived_population
0,34001,41.011234,28.847162,candidate,174564
1,34002,41.087647,28.987508,candidate,83647
2,34003,41.007238,28.66103,candidate,32255
3,34004,41.084536,28.642964,candidate,0
4,34005,41.053466,28.934142,candidate,71021


In [28]:
all_stations_pop.describe()

Unnamed: 0,station_id,lat,lon,arrived_population
count,1343.0,1343.0,1343.0,1343.0
mean,34672.0,41.048599,28.854584,47522.513775
std,387.835016,0.057993,0.151283,51093.246152
min,34001.0,40.784064,28.582889,0.0
25%,34336.5,41.011572,28.739573,0.0
50%,34672.0,41.048974,28.853868,33808.0
75%,35007.5,41.089144,28.955191,76827.5
max,35343.0,41.256408,29.409966,294879.0


### Extracting Metro Lines and Stations
We have not a extract metro lines and their stations dictionary. For further operation we need this dict .

In [29]:
TOLERANCE = 0.0005

project_dict = defaultdict(list)

existing_stations = all_stations_pop[all_stations_pop['TYPE'] == 'existing']

for idx, row in stations_df.iterrows():
    proje_adi_full = row['PROJE_ADI']
    lat = row['lat']
    lon = row['lon']

    match_hat = re.search(r'\b(M\d+[A-Z]?|T\d+)\b', proje_adi_full)
    if not match_hat:
        continue  
    hat_kodu = match_hat.group()


    match = existing_stations[
        (existing_stations['lat'].sub(lat).abs() < TOLERANCE) &
        (existing_stations['lon'].sub(lon).abs() < TOLERANCE)
    ]

    if not match.empty:
        for station_id in match['station_id']:
            project_dict[hat_kodu].append(station_id)

project_dict = dict(project_dict)


In [30]:
print(project_dict)

{'T1': [35001, 35017, 35025, 35027, 35031, 35032, 35033, 35055, 35056, 35065, 35066, 35071, 35075, 35094, 35095, 35097, 35101, 35107, 35125, 35126, 35127, 35128, 35131, 35135, 35150, 35151, 35154, 35185, 35193, 35196, 35209, 35224], 'M4': [35002, 35009, 35010, 35038, 35091, 35099, 35110, 35112, 35129, 35134, 35139, 35153, 35157, 35124, 35158, 35162, 35165, 35181, 35186, 35192, 35261, 35262, 35263, 35264, 35265, 35266, 35267, 35268, 35269, 35270], 'M1A': [35003, 35041, 35045, 35047, 35058, 35061, 35069, 35098, 35103, 35133, 35148, 35183, 35184, 35190, 35197, 35199, 35211, 35212, 35214], 'M9': [35004, 35084, 35187, 35160, 35230, 35250, 35283, 35284, 35285, 35286, 35287, 35288, 35326, 35289, 35114, 35290, 35291], 'T4': [35005, 35006, 35023, 35040, 35042, 35048, 35064, 35067, 35068, 35072, 35073, 35074, 35096, 35102, 35108, 35132, 35156, 35163, 35191, 35198, 35210, 35215], 'M5': [35012, 35014, 35079, 35111, 35115, 35118, 35141, 35144, 35147, 35167, 35171, 35173, 35175, 35176, 35177, 35200,

we dont know last stations of metro lines. i added last station end of the metro line array.

In [40]:
m_old = visualize_chromosome(existing_stations_dict , all_stations)
m_old.save("chromosome_map_old.html")

we should calculate the connectivitiy dictionary . this dict shows which station could be connected whichs stations 

In [33]:
connectivity_dict = calculate_connectivity_dict(all_stations_pop)

In [34]:
print(len(connectivity_dict))

1340


# GA

In [35]:
GeneticAlgorithm = GeneticMetroPlanner(
    all_stations_df = all_stations_pop,
    connectivity_dict = connectivity_dict,
    existing_lines_dict = existing_stations_dict)

In [36]:
for line , stations in existing_stations_dict.items():
    print(f"{line}:  {len(stations)} station")

T1:  32 station
M4:  30 station
M1A:  19 station
M9:  17 station
T4:  22 station
M5:  24 station
M7:  30 station
T5:  13 station
M2:  19 station
M3:  22 station
M6:  4 station
M1B:  16 station
M8:  13 station
M11:  14 station
M12:  11 station


In [37]:
chromosome = GeneticAlgorithm.generate_chromosome()
for line, stations in chromosome.items():
    print(f"{line}: {len(stations)} station")

T1: 33 station
M4: 31 station
M1A: 20 station
M9: 17 station
T4: 23 station
M5: 27 station
M7: 33 station
T5: 15 station
M2: 19 station
M3: 24 station
M6: 6 station
M1B: 18 station
M8: 14 station
M11: 14 station
M12: 11 station


In [38]:
print(chromosome)

{'T1': [34901, 34917, 34925, 34927, 34931, 34932, 34933, 34955, 34956, 34965, 34966, 34971, 34975, 34994, 34995, 34997, 35001, 35025, 35026, 35027, 35028, 35031, 35035, 35050, 35051, 35054, 35085, 35093, 35096, 35109, 35124, 35007, 34827], 'M4': [34902, 34909, 34910, 34938, 34991, 34999, 35010, 35012, 35029, 35034, 35039, 35053, 35057, 35024, 35058, 35062, 35065, 35081, 35086, 35092, 35161, 35162, 35163, 35164, 35165, 35166, 35168, 35169, 35170, 35167, 35340], 'M1A': [34903, 34941, 34945, 34947, 34958, 34961, 34969, 34998, 35003, 35033, 35083, 35084, 35090, 35097, 35099, 35111, 35112, 35114, 35048, 34914], 'M9': [34984, 35087, 35060, 35130, 35150, 35183, 35184, 35185, 35186, 35187, 35188, 35226, 35189, 35014, 35190, 35191, 34904], 'T4': [34905, 34906, 34923, 34940, 34942, 34948, 34964, 34967, 34968, 34972, 34973, 34974, 34996, 35002, 35008, 35032, 35056, 35063, 35091, 35098, 35110, 35115, 35024], 'M5': [34912, 34914, 34979, 35011, 35015, 35018, 35041, 35044, 35047, 35067, 35071, 35073,

In [40]:
def visualize_chromosome(chromosome, stations_df):
    m = folium.Map(location=[41.015137, 28.979530], zoom_start=11 , tiles='CartoDB positron')

    color_palette = [
    '#1f77b4',  # koyu mavi
    '#ff7f0e',  # turuncu
    '#2ca02c',  # koyu yeşil
    '#d62728',  # kırmızı
    '#9467bd',  # mor
    '#8c564b',  # kahverengi
    '#e377c2',  # pembe
    '#7f7f7f',  # gri
    '#bcbd22',  # zeytin
    '#17becf',  # cam göbeği
    '#393b79',  # lacivert
    '#637939',  # zeytin yeşili
    '#8c6d31',  # koyu altın
    '#843c39',  # bordo
    '#7b4173',  # koyu pembe
    '#5254a3',  # orta mavi
    '#9c9ede'   # açık mor ama yeterince koyu
                ]
    line_colors = {}

    for i, (line, station_ids) in enumerate(chromosome.items()):
        color = color_palette[i % len(color_palette)]
        line_colors[line] = color
        line_coords = []

        for station_id in station_ids:
            row = stations_df[stations_df['station_id'] == station_id]
            if row.empty:
                continue
            lat = row.iloc[0]['lat']
            lon = row.iloc[0]['lon']
            line_coords.append((lat, lon))

            folium.CircleMarker(
                location=(lat, lon),
                radius=4,
                color=color,
                fill=True,
                fill_color=color,
                fill_opacity=0.8,
                popup=f"{line}: {station_id}"
            ).add_to(m)

        

    return m

In [41]:
m_n = visualize_chromosome(chromosome , all_stations)

In [42]:
m_n.save("chromosome_map_new.html")

In [43]:
m_old = visualize_chromosome(existing_stations_dict , all_stations)
m_old.save("chromosome_map_old.html")

In [44]:
print(GeneticAlgorithm.calculate_population_for_chromosome(chromosome))

11885583


In [45]:
GeneticAlgorithm.generate_initial_population()

In [46]:
print(len(GeneticAlgorithm.population))

10


In [47]:
GeneticAlgorithm.fitness_population()

In [48]:
print(GeneticAlgorithm.fitness_values)

[np.float64(2.420021768686403), np.float64(1.0), np.float64(3.958176104283112), np.float64(3.744449565981973), np.float64(2.9357581016052796), np.float64(4.0), np.float64(3.3204828361783694), np.float64(3.7878496901900176), np.float64(3.4642919947786144), np.float64(3.534268896213979)]


# Implementation

In [49]:
planner = GeneticMetroPlanner(
    all_stations_df=all_stations,
    connectivity_dict=connectivity_dict,
    existing_lines_dict=existing_stations_dict,
    mutation_rate=0.2,
    generation_number=10,
    child_number=20,
    new_station_number=50,
    max_per_station=5,
    w2= 2
)


In [50]:
best_solution, best_score = planner.run()
print("Best solution")
print("Arrived population", best_score)

for line, stations in best_solution.items():
    print(f"{line}: {stations}")

Best solution
Arrived population 2.355203860595778
T1: [34901, 34917, 34925, 34927, 34931, 34932, 34933, 34955, 34956, 34965, 34966, 34971, 34975, 34994, 34995, 34997, 35001, 35025, 35026, 35027, 35028, 35031, 35035, 35050, 35051, 35054, 35085, 35093, 35096, 35109, 35124, 35007]
M4: [34902, 34909, 34910, 34938, 34991, 34999, 35010, 35012, 35029, 35034, 35039, 35053, 35057, 35024, 35058, 35062, 35065, 35081, 35086, 35092, 35161, 35162, 35163, 35164, 35165, 35166, 35168, 35169, 35170, 35167, 35342, 35264, 35304, 35478, 35344, 35449, 35415, 35306, 35348, 35531, 35571, 35535, 35426]
M1A: [34903, 34941, 34945, 34947, 34958, 34961, 34969, 34998, 35003, 35033, 35083, 35084, 35090, 35097, 35099, 35111, 35112, 35114, 35048, 34777, 34730, 34513, 34342, 34340]
M9: [34984, 35087, 35060, 35130, 35150, 35183, 35184, 35185, 35186, 35187, 35188, 35226, 35189, 35014, 35190, 35191, 34904, 34772, 34638, 34859, 34641, 34551, 34768, 34771, 34818, 34908, 34644, 34514, 34264, 34221, 34428, 34384]
T4: [34905,

In [51]:
m_best = visualize_chromosome(best_solution , all_stations)

In [3]:
m_best.save('best_metro_lines.html')

NameError: name 'm_best' is not defined