In [1]:
%load_ext autoreload
%autoreload 2

In [26]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import re
import importlib
from functions import *
import random
import folium
from GA import *
from shapely.geometry import Polygon

# 1) DATASETS and Data Cleaning

## a) Population Dataset

In [3]:
population_path = "/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/population_with_coordinates.csv"

In [4]:
raw_population_df = pd.read_csv(population_path , header=None, sep=",")
raw_population_df = raw_population_df.dropna(how='all', axis=1)
raw_population_df.columns = raw_population_df.iloc[0]
raw_population_df = raw_population_df.drop(index=0).reset_index(drop=True)
raw_population_df = raw_population_df.loc[:, raw_population_df.columns.notna()]
raw_population_df[['latitude', 'longitude']] = raw_population_df['coordinate'].str.split(',', expand=True)
raw_population_df['latitude'] = raw_population_df['latitude'].astype(float)
raw_population_df['longitude'] = raw_population_df['longitude'].astype(float)
raw_population_df['population'] = raw_population_df['population'].astype(int)
population_df = raw_population_df[['population', 'neighborhood', 'township', 'neighborhood_code', 'latitude', 'longitude']]
population_df = population_df.dropna()

In [5]:
population_df.head()

Unnamed: 0,population,neighborhood,township,neighborhood_code,latitude,longitude
0,4693,Adnan Menderes,Arnavutköy,40490,41.211179,28.700163
1,47828,Anadolu,Arnavutköy,99359,41.186036,28.749463
2,23116,Arnavutköy Merkez,Arnavutköy,40478,41.182546,28.737891
3,10566,Atatürk,Arnavutköy,40482,41.190149,28.760125
4,12829,Boğazköy İstiklal,Arnavutköy,40483,41.183488,28.768235


In [6]:
population_df.describe()

Unnamed: 0,population,latitude,longitude
count,708.0,708.0,708.0
mean,21397.411017,41.029966,28.972329
std,15943.770157,0.071404,0.193073
min,2025.0,40.817868,28.409171
25%,10279.25,40.998081,28.854917
50%,17970.0,41.028968,28.98112
75%,28527.75,41.063456,29.109349
max,112367.0,41.269994,29.386538


## b) Metro Station Dataset

In [7]:
stations_gdf = gpd.read_file("/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/station.geojson")

In [8]:
stations_gdf["lat"] = stations_gdf.geometry.x
stations_gdf["lon"] = stations_gdf.geometry.y
stations_df = stations_gdf[["ISTASYON", "PROJE_ADI", "HAT_TURU", "lat", "lon"]]
stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]


In [9]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694


# Visulation
Initial maps

In [10]:
istanbul_coords = [41.0082, 28.9784]


map_istanbul = folium.Map(location=istanbul_coords, zoom_start=11, tiles='CartoDB positron')

for index, row in stations_df.iterrows():
    popup_text = f"{row['ISTASYON']}<br>{row['PROJE_ADI']}"
    folium.Marker(
        location=[row['lat'], row['lon']],
        popup=popup_text,
        icon=folium.Icon(color='blue' if row['HAT_TURU'] == 'Metro' else 'green', icon='train')
    ).add_to(map_istanbul)

map_istanbul.save("maps/initial_metro_lines.html")

In [11]:
m = folium.Map(location=istanbul_coords, zoom_start=10 , tiles='CartoDB positron')


for _, row in population_df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"{row['neighborhood']} ({row['township']})\nNüfus: {row['population']}",
        tooltip=row['neighborhood']
    ).add_to(m)

m.save("maps/neighborhood.html")

# Calculating grids
These are going to be candidate stations

In [12]:
polygon = Polygon([(40.963, 28.605) , (41.000, 28.984) ,  (41.168, 29.051) , (41.098, 28.578) ])

In [13]:
grid_list = create_grid_for_polygon(polygon)

In [14]:
print(len(grid_list))

1000


In [15]:
grid_df = pd.DataFrame(grid_list)
grid_df.head()

Unnamed: 0,station_id,lat,lon
0,1,41.071929,28.699785
1,2,41.002279,28.751837
2,3,41.019758,28.876875
3,4,41.1296,28.987969
4,5,41.028151,28.928297


In [16]:
grid_df['TYPE'] = 'candidate'
stations_df['TYPE'] = 'existing'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df['TYPE'] = 'existing'


In [17]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon,TYPE
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681,existing
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398,existing
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809,existing
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234,existing
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694,existing


In [18]:
grid_df.head()

Unnamed: 0,station_id,lat,lon,TYPE
0,1,41.071929,28.699785,candidate
1,2,41.002279,28.751837,candidate
2,3,41.019758,28.876875,candidate
3,4,41.1296,28.987969,candidate
4,5,41.028151,28.928297,candidate


In [19]:
print(grid_df.dtypes)

station_id      int64
lat           float64
lon           float64
TYPE           object
dtype: object


In [20]:
m = folium.Map(location=istanbul_coords, zoom_start=10, tiles='CartoDB positron')

for _, row in grid_df.iterrows():
    folium.Marker(
        location=[row['lat'], row['lon']],  
        popup=f"station id: {row['station_id']}"
    ).add_to(m)

m.save("maps/grid.html")

In [21]:
all_stations = pd.concat([
    grid_df[['station_id', 'lat', 'lon' , 'TYPE']],
    stations_df[['ISTASYON', 'lat', 'lon' , 'TYPE']].rename(columns={'ISTASYON': 'station_id'})
]).reset_index(drop=True)


In [22]:
all_stations = all_stations.reset_index(drop=True)
all_stations['station_id'] = all_stations.index + 34001

In [23]:
all_stations.head()

Unnamed: 0,station_id,lat,lon,TYPE
0,34001,41.071929,28.699785,candidate
1,34002,41.002279,28.751837,candidate
2,34003,41.019758,28.876875,candidate
3,34004,41.1296,28.987969,candidate
4,34005,41.028151,28.928297,candidate


In [24]:
all_stations_pop = calculate_population_per_station(all_stations , population_df)

In [27]:
all_stations_pop.head()

Unnamed: 0,station_id,lat,lon,TYPE,arrived_population
0,34001,41.071929,28.699785,candidate,0
1,34002,41.002279,28.751837,candidate,0
2,34003,41.019758,28.876875,candidate,87336
3,34004,41.1296,28.987969,candidate,0
4,34005,41.028151,28.928297,candidate,25002


In [28]:
all_stations_pop.describe()

Unnamed: 0,station_id,lat,lon,arrived_population
count,1343.0,1343.0,1343.0,1343.0
mean,34672.0,41.050239,28.85096,48026.982874
std,387.835016,0.057192,0.153585,52108.82295
min,34001.0,40.784064,28.581036,0.0
25%,34336.5,41.014542,28.732807,0.0
50%,34672.0,41.050494,28.852692,33562.0
75%,35007.5,41.090258,28.95313,77700.0
max,35343.0,41.256408,29.409966,294879.0


### Extracting Metro Lines and Stations
We have not a extract metro lines and their stations dictionary. For further operation we need this dict .

In [29]:
TOLERANCE = 0.0005

project_dict = defaultdict(list)

existing_stations = all_stations_pop[all_stations_pop['TYPE'] == 'existing']

for idx, row in stations_df.iterrows():
    proje_adi_full = row['PROJE_ADI']
    lat = row['lat']
    lon = row['lon']

    match_hat = re.search(r'\b(M\d+[A-Z]?|T\d+)\b', proje_adi_full)
    if not match_hat:
        continue  
    hat_kodu = match_hat.group()


    match = existing_stations[
        (existing_stations['lat'].sub(lat).abs() < TOLERANCE) &
        (existing_stations['lon'].sub(lon).abs() < TOLERANCE)
    ]

    if not match.empty:
        for station_id in match['station_id']:
            project_dict[hat_kodu].append(station_id)

project_dict = dict(project_dict)


In [30]:
print(project_dict)

{'T1': [35001, 35017, 35025, 35027, 35031, 35032, 35033, 35055, 35056, 35065, 35066, 35071, 35075, 35094, 35095, 35097, 35101, 35107, 35125, 35126, 35127, 35128, 35131, 35135, 35150, 35151, 35154, 35185, 35193, 35196, 35209, 35224], 'M4': [35002, 35009, 35010, 35038, 35091, 35099, 35110, 35112, 35129, 35134, 35139, 35153, 35157, 35124, 35158, 35162, 35165, 35181, 35186, 35192, 35261, 35262, 35263, 35264, 35265, 35266, 35267, 35268, 35269, 35270], 'M1A': [35003, 35041, 35045, 35047, 35058, 35061, 35069, 35098, 35103, 35133, 35148, 35183, 35184, 35190, 35197, 35199, 35211, 35212, 35214], 'M9': [35004, 35084, 35187, 35160, 35230, 35250, 35283, 35284, 35285, 35286, 35287, 35288, 35326, 35289, 35114, 35290, 35291], 'T4': [35005, 35006, 35023, 35040, 35042, 35048, 35064, 35067, 35068, 35072, 35073, 35074, 35096, 35102, 35108, 35132, 35156, 35163, 35191, 35198, 35210, 35215], 'M5': [35012, 35014, 35079, 35111, 35115, 35118, 35141, 35144, 35147, 35167, 35171, 35173, 35175, 35176, 35177, 35200,

In [31]:
existing_stations_dict = {
 'T1': [35001, 35017, 35025, 35027, 35031, 35032, 35033, 35055, 35056, 35065, 35066, 35071, 35075, 35094, 35095, 35097, 35101,  
        35125, 35126, 35127, 35128, 35131, 35135, 35150, 35151, 35154, 35185, 35193, 35196, 35209, 35224, 35107], 
 'M4': [35002, 35009, 35010, 35038, 35091, 35099, 35110, 35112, 35129, 35134, 35139, 35153, 35157, 35124, 35158, 35162, 35165, 35181, 
        35186, 35192, 35261, 35262, 35263, 35264, 35265, 35266,  35268, 35269, 35270 , 35267], 
 'M1A': [35003, 35041, 35045, 35047, 35058, 35061, 35069, 35098, 35103, 35133,  35183, 35184, 35190, 35197, 35199, 35211, 35212, 
         35214 , 35148], 
 'M9': [35004, 35084, 35187, 35160, 35230, 35250, 35283, 35284, 35285, 35286, 35287, 35288, 35326, 35289, 35114,  35291 ,35290], 
 'T4': [35005, 35006, 35023, 35040, 35048, 35064, 35067, 35068, 35072, 35073, 35074, 35096, 35102, 35108, 35132, 35156, 35163, 
        35191, 35198, 35210, 35215 , 35042], 
 'M5': [35012, 35014, 35079, 35111, 35115, 35118, 35141, 35144, 35147, 35167, 35171, 35173, 35175, 35176, 35177, 35200, 35217, 35254, 
        35255, 35257, 35258, 35259, 35260 , 35256], 
 'M7': [35015, 35037, 35052, 35053, 35078, 35080, 35082, 35113, 35116, 35121, 35143, 35146, 35170, 35203, 35207, 35219, 35220, 35221, 
        35301, 35302, 35303, 35304, 35305, 35306, 35307, 35308, 35309, 35310, 35342 , 35222], 
 'T5': [35018, 35019, 35022, 35039, 35059, 35155, 35225, 35226, 35227, 35282, 35314, 35343 , 35020], 
 'M2': [35024, 35216, 35030, 35044, 35060, 35063, 35089, 35090,  35105, 35109, 35149, 35164, 35166, 35182, 35194, 35024, 35216,
        35223 ,35104],
 'M3': [35026, 35057, 35028, 35043, 35070, 35106, 35137, 35160, 35230, 35189, 35274, 35277, 35278, 35280, 35281, 35311, 35312,
        35327, 35328, 35329, 35330 , 35161], 
 'M6': [35049, 35159, 35188 , 35140], 
 'M1B': [35026, 35057, 35136, 35138, 35152, 35195, 35275, 35276, 35279,  35316, 35319, 35321, 35322, 35288, 35326 , 35313], 
 'M8': [35218, 35271,  35273, 35292, 35293, 35294, 35295, 35296, 35297, 35298, 35299, 35300 , 35272], 
 'M11': [35240, 35241, 35242, 35243, 35244, 35245, 35246, 35247,  35318, 35320, 35323, 35324, 35325 , 35315], 
 'M12': [35331, 35332, 35333, 35334, 35335, 35336, 35337, 35338, 35339, 35340, 35341]
    }

we dont know last stations of metro lines. i added last station end of the metro line array.

In [32]:
m_old = visualize_chromosome(existing_stations_dict , all_stations)
m_old.save("maps/initial_metro_with_color")

we should calculate the connectivitiy dictionary . this dict shows which station could be connected whichs stations 

In [33]:
connectivity_dict = calculate_connectivity_dict(all_stations_pop)

In [34]:
print(len(connectivity_dict))

1340


# GA

In [35]:
GeneticAlgorithm = GeneticMetroPlanner(
    all_stations_df = all_stations_pop,
    connectivity_dict = connectivity_dict,
    existing_lines_dict = existing_stations_dict)

In [36]:
for line , stations in existing_stations_dict.items():
    print(f"{line}:  {len(stations)} station")

T1:  32 station
M4:  30 station
M1A:  19 station
M9:  17 station
T4:  22 station
M5:  24 station
M7:  30 station
T5:  13 station
M2:  19 station
M3:  22 station
M6:  4 station
M1B:  16 station
M8:  13 station
M11:  14 station
M12:  11 station


In [37]:
chromosome = GeneticAlgorithm.generate_chromosome()
for line, stations in chromosome.items():
    print(f"{line}: {len(stations)} station")

T1: 35 station
M4: 30 station
M1A: 19 station
M9: 17 station
T4: 24 station
M5: 24 station
M7: 33 station
T5: 15 station
M2: 22 station
M3: 25 station
M6: 6 station
M1B: 19 station
M8: 13 station
M11: 17 station
M12: 11 station


In [38]:
print(chromosome)

{'T1': [35001, 35017, 35025, 35027, 35031, 35032, 35033, 35055, 35056, 35065, 35066, 35071, 35075, 35094, 35095, 35097, 35101, 35125, 35126, 35127, 35128, 35131, 35135, 35150, 35151, 35154, 35185, 35193, 35196, 35209, 35224, 35107, 34003, 34533, 34451], 'M4': [35002, 35009, 35010, 35038, 35091, 35099, 35110, 35112, 35129, 35134, 35139, 35153, 35157, 35124, 35158, 35162, 35165, 35181, 35186, 35192, 35261, 35262, 35263, 35264, 35265, 35266, 35268, 35269, 35270, 35267], 'M1A': [35003, 35041, 35045, 35047, 35058, 35061, 35069, 35098, 35103, 35133, 35183, 35184, 35190, 35197, 35199, 35211, 35212, 35214, 35148], 'M9': [35004, 35084, 35187, 35160, 35230, 35250, 35283, 35284, 35285, 35286, 35287, 35288, 35326, 35289, 35114, 35291, 35290], 'T4': [35005, 35006, 35023, 35040, 35048, 35064, 35067, 35068, 35072, 35073, 35074, 35096, 35102, 35108, 35132, 35156, 35163, 35191, 35198, 35210, 35215, 35042, 34025, 34631], 'M5': [35012, 35014, 35079, 35111, 35115, 35118, 35141, 35144, 35147, 35167, 35171,

In [39]:
m_n = visualize_chromosome(chromosome , all_stations)

In [40]:
m_n.save("maps/chromosome_map_first_iteration.html")

In [41]:
print(GeneticAlgorithm.calculate_population_for_chromosome(chromosome))

18680350


In [42]:
GeneticAlgorithm.generate_initial_population()

In [43]:
print(len(GeneticAlgorithm.population))

10


In [44]:
GeneticAlgorithm.fitness_population()

In [45]:
print(GeneticAlgorithm.fitness_values)

[np.float64(4.206016044064019), np.float64(1.55768611541848), np.float64(2.18613447076957), np.float64(1.6666666666666665), np.float64(1.8674565658698736), np.float64(1.226875247061657), np.float64(0.6877009762044174), np.float64(2.666666666666667), np.float64(2.1191262590872877), np.float64(1.2946065731271963)]


# Implementation

In [46]:
planner = GeneticMetroPlanner(
    all_stations_df=all_stations,
    connectivity_dict=connectivity_dict,
    existing_lines_dict=existing_stations_dict,
    mutation_rate=0.2,
    generation_number=10,
    child_number=20,
    new_station_number=50,
    max_per_station=5,
    w2= 10
)


In [None]:
best_solution, best_score = planner.run()
print("Best solution")
print("Arrived population", best_score)

for line, stations in best_solution.items():
    print(f"{line}: {stations}")

In [None]:
m_best = visualize_chromosome(best_solution , all_stations)

In [None]:
m_best.save('maps/best_metro_lines.html')