In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import re
import importlib
from functions import *
import random
import folium
from GA import *
from shapely.geometry import Polygon

# 1) DATASETS and Data Cleaning

## a) Population Dataset

In [2]:
population_path = "/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/population_with_coordinates.csv"

In [3]:
raw_population_df = pd.read_csv(population_path , header=None, sep=",")

In [4]:
raw_population_df = raw_population_df.dropna(how='all', axis=1)

In [5]:
raw_population_df.columns = raw_population_df.iloc[0]

In [6]:
raw_population_df = raw_population_df.drop(index=0).reset_index(drop=True)
raw_population_df = raw_population_df.loc[:, raw_population_df.columns.notna()]

In [7]:
raw_population_df[['latitude', 'longitude']] = raw_population_df['coordinate'].str.split(',', expand=True)

In [8]:
raw_population_df['latitude'] = raw_population_df['latitude'].astype(float)
raw_population_df['longitude'] = raw_population_df['longitude'].astype(float)
raw_population_df['population'] = raw_population_df['population'].astype(int)

In [9]:
population_df = raw_population_df[['population', 'neighborhood', 'township', 'neighborhood_code', 'latitude', 'longitude']]

In [10]:
population_df = population_df.dropna()

In [11]:
population_df.head()

Unnamed: 0,population,neighborhood,township,neighborhood_code,latitude,longitude
0,4693,Adnan Menderes,Arnavutköy,40490,41.211179,28.700163
1,47828,Anadolu,Arnavutköy,99359,41.186036,28.749463
2,23116,Arnavutköy Merkez,Arnavutköy,40478,41.182546,28.737891
3,10566,Atatürk,Arnavutköy,40482,41.190149,28.760125
4,12829,Boğazköy İstiklal,Arnavutköy,40483,41.183488,28.768235


In [12]:
population_df.describe()

Unnamed: 0,population,latitude,longitude
count,708.0,708.0,708.0
mean,21397.411017,41.029966,28.972329
std,15943.770157,0.071404,0.193073
min,2025.0,40.817868,28.409171
25%,10279.25,40.998081,28.854917
50%,17970.0,41.028968,28.98112
75%,28527.75,41.063456,29.109349
max,112367.0,41.269994,29.386538


## b) Metro Station Dataset

In [13]:
stations_gdf = gpd.read_file("/home/saydam/Desktop/2024-2025_itu/yzv202/project/github/datasets/station.geojson")

In [14]:
print(stations_gdf.head())

                   ISTASYON                                      PROJE_ADI  \
0               Mehmet Akif            T1 Kabataş - Bağcılar Tramvay Hattı   
1                  Soğanlık                   M4 Kadıköy - SGH Metro Hattı   
2                   Aksaray  M1A Yenikapı - Atatürk Havalimanı Metro Hattı   
3                 Olimpiyat            M9 Bahariye - Olimpiyat Metro Hattı   
4  Sağmalcılar-Pancar Motor      T4 Topkapı - Mescid-i Selam Tramvay Hattı   

               PROJE_ASAMA HAT_TURU                                  MUDURLUK  \
0  Mevcut Hattaki İstasyon  Tramvay   Avrupa Yakası Raylı Sistemler Müdürlüğü   
1  Mevcut Hattaki İstasyon    Metro  Anadolu Yakası Raylı Sistemler Müdürlüğü   
2  Mevcut Hattaki İstasyon    Metro   Avrupa Yakası Raylı Sistemler Müdürlüğü   
3  Mevcut Hattaki İstasyon    Metro   Avrupa Yakası Raylı Sistemler Müdürlüğü   
4  Mevcut Hattaki İstasyon  Tramvay   Avrupa Yakası Raylı Sistemler Müdürlüğü   

                    geometry  
0   POINT (28

In [15]:
stations_gdf["lat"] = stations_gdf.geometry.x
stations_gdf["lon"] = stations_gdf.geometry.y

In [16]:
stations_df = stations_gdf[["ISTASYON", "PROJE_ADI", "HAT_TURU", "lat", "lon"]]

In [17]:
stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df[["lat", "lon"]] = stations_df[["lon", "lat"]]


In [18]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694


# Visulation
Initial maps

In [19]:
istanbul_coords = [41.0082, 28.9784]


map_istanbul = folium.Map(location=istanbul_coords, zoom_start=11, tiles='CartoDB positron')

for index, row in stations_df.iterrows():
    popup_text = f"{row['ISTASYON']}<br>{row['PROJE_ADI']}"
    folium.Marker(
        location=[row['lat'], row['lon']],
        popup=popup_text,
        icon=folium.Icon(color='blue' if row['HAT_TURU'] == 'Metro' else 'green', icon='train')
    ).add_to(map_istanbul)

map_istanbul.save("initial_metro_lines.html")

In [20]:
m = folium.Map(location=istanbul_coords, zoom_start=10 , tiles='CartoDB positron')


for _, row in population_df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"{row['neighborhood']} ({row['township']})\nNüfus: {row['population']}",
        tooltip=row['neighborhood']
    ).add_to(m)

m.save("neighborhood.html")

# Calculating grids
These are going to be candidate stations

In [39]:
polygon = Polygon([(40.963, 28.605) , (41.000, 28.984) ,  (41.168, 29.051) , (41.098, 28.578) ])

In [40]:
grid_list = create_grid_for_polygon(polygon , num_lat_grids = 50 , num_lon_grids = 50)

In [41]:
print(len(grid_list))

1636


In [42]:
grid_df = pd.DataFrame(grid_list)
grid_df.head()

Unnamed: 0,station_id,lat,lon
0,"(0, 3)",40.96505,28.61111
1,"(0, 4)",40.96505,28.62057
2,"(1, 3)",40.96915,28.61111
3,"(1, 4)",40.96915,28.62057
4,"(1, 5)",40.96915,28.63003


In [43]:
grid_df['TYPE'] = 'candidate'
stations_df['TYPE'] = 'existing'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stations_df['TYPE'] = 'existing'


In [44]:
stations_df.head()

Unnamed: 0,ISTASYON,PROJE_ADI,HAT_TURU,lat,lon,TYPE
0,Mehmet Akif,T1 Kabataş - Bağcılar Tramvay Hattı,Tramvay,41.0058,28.881681,existing
1,Soğanlık,M4 Kadıköy - SGH Metro Hattı,Metro,40.913288,29.192398,existing
2,Aksaray,M1A Yenikapı - Atatürk Havalimanı Metro Hattı,Metro,41.012008,28.94809,existing
3,Olimpiyat,M9 Bahariye - Olimpiyat Metro Hattı,Metro,41.079466,28.767234,existing
4,Sağmalcılar-Pancar Motor,T4 Topkapı - Mescid-i Selam Tramvay Hattı,Tramvay,41.05728,28.90694,existing


In [45]:
grid_df.head()

Unnamed: 0,station_id,lat,lon,TYPE
0,"(0, 3)",40.96505,28.61111,candidate
1,"(0, 4)",40.96505,28.62057,candidate
2,"(1, 3)",40.96915,28.61111,candidate
3,"(1, 4)",40.96915,28.62057,candidate
4,"(1, 5)",40.96915,28.63003,candidate


In [46]:
print(grid_df.dtypes)

station_id     object
lat           float64
lon           float64
TYPE           object
dtype: object


In [47]:
m = folium.Map(location=istanbul_coords, zoom_start=10, tiles='CartoDB positron')

for _, row in grid_df.iterrows():
    folium.Marker(
        location=[row['lat'], row['lon']],  
        popup=f"station id: {row['station_id']}"
    ).add_to(m)

m.save("grid.html")

In [48]:
all_stations = pd.concat([
    grid_df[['station_id', 'lat', 'lon' , 'TYPE']],
    stations_df[['ISTASYON', 'lat', 'lon' , 'TYPE']].rename(columns={'ISTASYON': 'station_id'})
]).reset_index(drop=True)


In [49]:
all_stations = all_stations.reset_index(drop=True)
all_stations['station_id'] = all_stations.index + 34001

In [50]:
all_stations.head()

Unnamed: 0,station_id,lat,lon,TYPE
0,34001,40.96505,28.61111,candidate
1,34002,40.96505,28.62057,candidate
2,34003,40.96915,28.61111,candidate
3,34004,40.96915,28.62057,candidate
4,34005,40.96915,28.63003,candidate


In [51]:
all_stations_pop = calculate_population_per_station(all_stations , population_df)

In [52]:
all_stations_pop_filtered = all_stations_pop[
    ~((all_stations_pop['arrived_population'] < 1000) & (all_stations_pop['TYPE'] == 'candidate'))
]

all_stations_pop_filtered = all_stations_pop_filtered.sort_values(by='arrived_population' , ascending = False)

In [53]:
all_stations_pop_filtered.head()

Unnamed: 0,station_id,lat,lon,TYPE,arrived_population
210,34211,41.00195,28.84761,candidate,294879
251,34252,41.00605,28.84761,candidate,281277
1913,35914,41.019391,28.857845,existing,235721
417,34418,41.02245,28.84761,candidate,235447
209,34210,41.00195,28.83815,candidate,231141


In [54]:
all_stations_pop_filtered.describe()

Unnamed: 0,station_id,lat,lon,arrived_population
count,1337.0,1337.0,1337.0,1337.0
mean,34969.335079,41.039195,28.863039,68909.79282
std,614.563305,0.05213,0.148046,48074.429263
min,34002.0,40.784064,28.59219,0.0
25%,34430.0,41.010139,28.76247,31210.0
50%,34866.0,41.03885,28.86653,61758.0
75%,35645.0,41.07165,28.95167,97581.0
max,35979.0,41.256408,29.409966,294879.0


### Extracting Metro Lines and Stations
We have not a extract metro lines and their stations dictionary. For further operation we need this dict .

In [55]:
TOLERANCE = 0.0005

project_dict = defaultdict(list)

existing_stations = all_stations_pop[all_stations_pop['TYPE'] == 'existing']

for idx, row in stations_df.iterrows():
    proje_adi_full = row['PROJE_ADI']
    lat = row['lat']
    lon = row['lon']

    match_hat = re.search(r'\b(M\d+[A-Z]?|T\d+)\b', proje_adi_full)
    if not match_hat:
        continue  
    hat_kodu = match_hat.group()


    match = existing_stations[
        (existing_stations['lat'].sub(lat).abs() < TOLERANCE) &
        (existing_stations['lon'].sub(lon).abs() < TOLERANCE)
    ]

    if not match.empty:
        for station_id in match['station_id']:
            project_dict[hat_kodu].append(station_id)

project_dict = dict(project_dict)


In [56]:
print(project_dict)

{'T1': [35637, 35653, 35661, 35663, 35667, 35668, 35669, 35691, 35692, 35701, 35702, 35707, 35711, 35730, 35731, 35733, 35737, 35743, 35761, 35762, 35763, 35764, 35767, 35771, 35786, 35787, 35790, 35821, 35829, 35832, 35845, 35860], 'M4': [35638, 35645, 35646, 35674, 35727, 35735, 35746, 35748, 35765, 35770, 35775, 35789, 35793, 35760, 35794, 35798, 35801, 35817, 35822, 35828, 35897, 35898, 35899, 35900, 35901, 35902, 35903, 35904, 35905, 35906], 'M1A': [35639, 35677, 35681, 35683, 35694, 35697, 35705, 35734, 35739, 35769, 35784, 35819, 35820, 35826, 35833, 35835, 35847, 35848, 35850], 'M9': [35640, 35720, 35823, 35796, 35866, 35886, 35919, 35920, 35921, 35922, 35923, 35924, 35962, 35925, 35750, 35926, 35927], 'T4': [35641, 35642, 35659, 35676, 35678, 35684, 35700, 35703, 35704, 35708, 35709, 35710, 35732, 35738, 35744, 35768, 35792, 35799, 35827, 35834, 35846, 35851], 'M5': [35648, 35650, 35715, 35747, 35751, 35754, 35777, 35780, 35783, 35803, 35807, 35809, 35811, 35812, 35813, 35836,

we dont know last stations of metro lines. i added last station end of the metro line array.

In [57]:
existing_stations_dict = {'T1': [34901, 34917, 34925, 34927, 34931, 34932, 34933, 34955, 34956, 34965, 34966, 34971, 34975, 34994, 34995, 
34997, 35001, 35025, 35026, 35027, 35028, 35031, 35035, 35050, 35051, 35054, 35085, 35093, 35096, 35109, 35124 , 35007], 
                            'M4': [34902, 34909, 34910, 34938, 34991, 34999, 35010, 35012, 35029, 35034, 35039, 35053, 35057, 35024, 
35058, 35062, 35065, 35081, 35086, 35092, 35161, 35162, 35163, 35164, 35165, 35166, 35168, 35169, 35170 , 35167], 
                          'M1A': [34903, 34941, 34945, 34947, 34958, 34961, 34969, 
34998, 35003, 35033, 35083, 35084, 35090, 35097, 35099, 35111, 35112, 35114 , 35048], 
                          'M9': [34984, 35087, 35060, 35130, 35150, 35183,
35184, 35185, 35186, 35187, 35188, 35226, 35189, 35014, 35190, 35191, 34904], 
                          'T4': [34905, 34906, 34923, 34940, 34942, 34948, 34964, 34967, 34968,
34972, 34973, 34974, 34996, 35002, 35008, 35032, 35056, 35063, 35091, 35098, 35110, 35115], 
                          'M5': [34912, 34914, 34979, 35011, 35015, 35018,
35041, 35044, 35047, 35067, 35071, 35073, 35075, 35076, 35077, 35100, 35117, 35154, 35155, 35157, 35158, 35159, 35160, 35156], 
                          'M7': [34915,
34937, 34952, 34953, 34978, 34980, 34982, 35013, 35016, 35021, 35043, 35046, 35070, 35103, 35107, 35119, 35120, 35121, 35201, 35202,
35203, 35204, 35205, 35206, 35207, 35208, 35209, 35210, 35242 , 35122],
                          'T5': [34918, 34919, 34920, 34922, 34939, 34959, 35055, 35125, 35126, 35127, 
35182, 35214, 35243], 
                          'M2': [34924, 35116, 34930, 34944, 34960, 34963, 34989, 34990, 35005, 35009, 35049, 35064, 35066, 35082, 35094, 
                                 34924, 35116, 35123 , 35004], 
                           'M3': [34926, 34957, 34928, 34943, 34970, 35006, 35037, 35060, 35130,  35089, 35174, 35177, 35178, 35180, 
35181, 35211, 35212, 35227, 35228, 35229, 35230 , 35061], 
                          'M6': [34949, 35059, 35088 , 35040], 
                          'M1B': [34926, 34957, 35036, 35038, 35052, 35095, 35175, 35176, 35179, 35213, 35216,  35221, 35222, 35188, 
35226 , 35219], 
                          'M8': [35118, 35171, 35173, 35192, 35193, 35194, 35195, 35196, 35197, 35198, 35199, 35200 , 35172], 
                          'M11': [35140, 35141, 35142, 
35143, 35144, 35145, 35146, 35147, 35218, 35220, 35223, 35224, 35225 , 35215], 
                          'M12': [35231, 35232, 35233, 35234, 
35235, 35236, 35237, 35238, 35239, 35240, 35241]}


ww should calculate the connectivitiy dictionary . this dict shows which station could be connected whichs stations 

In [58]:
connectivity_dict = calculate_connectivity_dict(all_stations_pop)

In [59]:
print(len(connectivity_dict))

1976


# GA

In [60]:
GeneticAlgorithm = GeneticMetroPlanner(
    all_stations_df = all_stations_pop_filtered,
    connectivity_dict = connectivity_dict,
    existing_lines_dict = existing_stations_dict)

In [61]:
chromosome = GeneticAlgorithm.generate_chromosome()
for line, stations in chromosome.items():
    print(f"{line}: {len(stations)} station")

T1: 32 station
M4: 30 station
M1A: 22 station
M9: 20 station
T4: 25 station
M5: 25 station
M7: 31 station
T5: 16 station
M2: 22 station
M3: 23 station
M6: 6 station
M1B: 18 station
M8: 14 station
M11: 14 station
M12: 13 station


In [62]:
for line , stations in existing_stations_dict.items():
    print(f"{line}:  {len(stations)} station")

T1:  32 station
M4:  30 station
M1A:  19 station
M9:  17 station
T4:  22 station
M5:  24 station
M7:  30 station
T5:  13 station
M2:  19 station
M3:  22 station
M6:  4 station
M1B:  16 station
M8:  13 station
M11:  14 station
M12:  11 station


In [63]:
print(chromosome)

{'T1': [34901, 34917, 34925, 34927, 34931, 34932, 34933, 34955, 34956, 34965, 34966, 34971, 34975, 34994, 34995, 34997, 35001, 35025, 35026, 35027, 35028, 35031, 35035, 35050, 35051, 35054, 35085, 35093, 35096, 35109, 35124, 35007], 'M4': [34902, 34909, 34910, 34938, 34991, 34999, 35010, 35012, 35029, 35034, 35039, 35053, 35057, 35024, 35058, 35062, 35065, 35081, 35086, 35092, 35161, 35162, 35163, 35164, 35165, 35166, 35168, 35169, 35170, 35167], 'M1A': [34903, 34941, 34945, 34947, 34958, 34961, 34969, 34998, 35003, 35033, 35083, 35084, 35090, 35097, 35099, 35111, 35112, 35114, 35048, 35188, 35047, 35095], 'M9': [34984, 35087, 35060, 35130, 35150, 35183, 35184, 35185, 35186, 35187, 35188, 35226, 35189, 35014, 35190, 35191, 34904, 35088, 34906, 35040], 'T4': [34905, 34906, 34923, 34940, 34942, 34948, 34964, 34967, 34968, 34972, 34973, 34974, 34996, 35002, 35008, 35032, 35056, 35063, 35091, 35098, 35110, 35115, 34933, 34796, 34888], 'M5': [34912, 34914, 34979, 35011, 35015, 35018, 35041,

In [64]:
def visualize_chromosome(chromosome, stations_df):
    m = folium.Map(location=[41.015137, 28.979530], zoom_start=11 , tiles='CartoDB positron')

    color_palette = [
    '#1f77b4',  # koyu mavi
    '#ff7f0e',  # turuncu
    '#2ca02c',  # koyu yeşil
    '#d62728',  # kırmızı
    '#9467bd',  # mor
    '#8c564b',  # kahverengi
    '#e377c2',  # pembe
    '#7f7f7f',  # gri
    '#bcbd22',  # zeytin
    '#17becf',  # cam göbeği
    '#393b79',  # lacivert
    '#637939',  # zeytin yeşili
    '#8c6d31',  # koyu altın
    '#843c39',  # bordo
    '#7b4173',  # koyu pembe
    '#5254a3',  # orta mavi
    '#9c9ede'   # açık mor ama yeterince koyu
                ]
    line_colors = {}

    for i, (line, station_ids) in enumerate(chromosome.items()):
        color = color_palette[i % len(color_palette)]
        line_colors[line] = color
        line_coords = []

        for station_id in station_ids:
            row = stations_df[stations_df['station_id'] == station_id]
            if row.empty:
                continue
            lat = row.iloc[0]['lat']
            lon = row.iloc[0]['lon']
            line_coords.append((lat, lon))

            folium.CircleMarker(
                location=(lat, lon),
                radius=4,
                color=color,
                fill=True,
                fill_color=color,
                fill_opacity=0.8,
                popup=f"{line}: {station_id}"
            ).add_to(m)

        

    return m

In [65]:
m_n = visualize_chromosome(chromosome , all_stations)

In [66]:
m_n.save("chromosome_map_new.html")

In [67]:
m_old = visualize_chromosome(existing_stations_dict , all_stations)
m_old.save("chromosome_map_old.html")

In [68]:
print(GeneticAlgorithm.calculate_population_for_chromosome(chromosome))

12377047


In [69]:
GeneticAlgorithm.generate_initial_population()

In [70]:
print(len(GeneticAlgorithm.population))

10


In [71]:
GeneticAlgorithm.fitness_population()

In [72]:
print(GeneticAlgorithm.fitness_values)

[np.float64(1.4405486023634433), np.float64(2.4825656572660537), np.float64(3.066983059378848), np.float64(1.0), np.float64(4.0), np.float64(3.065098057343611), np.float64(2.6329112623735362), np.float64(1.420301523281958), np.float64(2.503594729698669), np.float64(3.246466584229929)]


In [73]:
planner = GeneticMetroPlanner(
    all_stations_df=all_stations,
    connectivity_dict=connectivity_dict,
    existing_lines_dict=existing_stations_dict,
    mutation_rate=0.2,
    generation_number=10,
    child_number=20,
    new_station_number=50,
    max_per_station=5,
    w2= 2
)


In [74]:
best_solution, best_score = planner.run()
print("Best solution")
print("Arrived population", best_score)

for line, stations in best_solution.items():
    print(f"{line}: {stations}")

Best solution
Arrived population 2.355203860595778
T1: [34901, 34917, 34925, 34927, 34931, 34932, 34933, 34955, 34956, 34965, 34966, 34971, 34975, 34994, 34995, 34997, 35001, 35025, 35026, 35027, 35028, 35031, 35035, 35050, 35051, 35054, 35085, 35093, 35096, 35109, 35124, 35007]
M4: [34902, 34909, 34910, 34938, 34991, 34999, 35010, 35012, 35029, 35034, 35039, 35053, 35057, 35024, 35058, 35062, 35065, 35081, 35086, 35092, 35161, 35162, 35163, 35164, 35165, 35166, 35168, 35169, 35170, 35167, 35342, 35264, 35304, 35478, 35344, 35449, 35415, 35306, 35348, 35531, 35571, 35535, 35426]
M1A: [34903, 34941, 34945, 34947, 34958, 34961, 34969, 34998, 35003, 35033, 35083, 35084, 35090, 35097, 35099, 35111, 35112, 35114, 35048, 34777, 34730, 34513, 34342, 34340]
M9: [34984, 35087, 35060, 35130, 35150, 35183, 35184, 35185, 35186, 35187, 35188, 35226, 35189, 35014, 35190, 35191, 34904, 34772, 34638, 34859, 34641, 34551, 34768, 34771, 34818, 34908, 34644, 34514, 34264, 34221, 34428, 34384]
T4: [34905,

In [75]:
m_best = visualize_chromosome(best_solution , all_stations)

In [76]:
m_best.save("best_metro_lines.html")