In [1]:
import os
import geopandas as gpd

In [2]:
file_path = "../original_data/iett_otobus_duraklar.geojson"
gdf = gpd.read_file(file_path)

print(gdf.head())

    ID                ADI DURAK_KODU DURUMU  DURAK_TIPI    YON_BILGIS  \
0  392      MERCAN YUVASI     225972      1  İETTBAYRAK     SON DURAK   
1  393   DENİZ HARP OKULU     225981      1    CCMODERN         TUZLA   
2  394        ŞEKER SOKAK     227161      1  AÇIK DURAK      TEPEÖREN   
3  398  EGEMENLİK CADDESİ     228381      1  AÇIK DURAK       KADIKÖY   
4  402      İNÖNÜ CADDESİ     228401      1   IETTCAMLI  PENDİK  133Ş   

            SON_GUNCEL           YAPILIS_TA ILCEID MAHALLEID     VERSIYON  \
0  13.07.2020 00:00:00  02.01.2016 00:00:00   2015     40719  2,02007E+14   
1  01.06.2020 00:00:00  02.01.2016 00:00:00   2015     40719  2,02006E+14   
2  17.11.2021 00:00:00  02.01.2016 00:00:00   2015     40720  2,02111E+14   
3  23.09.2021 00:00:00  02.01.2016 00:00:00   2015     40722  2,02109E+14   
4  01.01.1976 00:00:00  02.01.2016 00:00:00   2015     40722  2,01806E+14   

  CEP_VAR                   geometry  
0       0   POINT (29.27635 40.8183)  
1       1  POINT (29

In [3]:
print("Column names:")
print(gdf.columns)

print("\nDataset info:")
print(gdf.info())

print("\nNull values in each column:")
print(gdf.isnull().sum())

print("\nBasic statistics:")
print(gdf.describe())


Column names:
Index(['ID', 'ADI', 'DURAK_KODU', 'DURUMU', 'DURAK_TIPI', 'YON_BILGIS',
       'SON_GUNCEL', 'YAPILIS_TA', 'ILCEID', 'MAHALLEID', 'VERSIYON',
       'CEP_VAR', 'geometry'],
      dtype='object')

Dataset info:
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 15316 entries, 0 to 15315
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   ID          15316 non-null  object  
 1   ADI         15316 non-null  object  
 2   DURAK_KODU  15316 non-null  object  
 3   DURUMU      15316 non-null  object  
 4   DURAK_TIPI  15316 non-null  object  
 5   YON_BILGIS  15316 non-null  object  
 6   SON_GUNCEL  14408 non-null  object  
 7   YAPILIS_TA  15316 non-null  object  
 8   ILCEID      15316 non-null  object  
 9   MAHALLEID   15316 non-null  object  
 10  VERSIYON    15316 non-null  object  
 11  CEP_VAR     15282 non-null  object  
 12  geometry    15316 non-null  geometry
dtypes: geometry(1), object(1

In [4]:
columns_to_drop = ["ID", "ADI", "DURAK_TIPI","YON_BILGIS", "SON_GUNCEL", "YAPILIS_TA", "ILCEID", "MAHALLEID", "VERSIYON", "CEP_VAR"]
gdf = gdf.drop(columns=columns_to_drop)

In [5]:
duplicate_stops = gdf['DURAK_KODU'].value_counts()
print("Number of duplicate DURAK_KODU values:", len(gdf[gdf['DURAK_KODU'].duplicated()]))

Number of duplicate DURAK_KODU values: 0


In [6]:
gdf = gdf.drop(columns=["DURAK_KODU"])

In [7]:
print("Column names:")
print(gdf.columns)

print("\nDataset info:")
print(gdf.info())

print("\nNull values in each column:")
print(gdf.isnull().sum())

print("\nBasic statistics:")
print(gdf.describe())

Column names:
Index(['DURUMU', 'geometry'], dtype='object')

Dataset info:
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 15316 entries, 0 to 15315
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   DURUMU    15316 non-null  object  
 1   geometry  15316 non-null  geometry
dtypes: geometry(1), object(1)
memory usage: 239.4+ KB
None

Null values in each column:
DURUMU      0
geometry    0
dtype: int64

Basic statistics:
       DURUMU                                   geometry
count   15316                                      15316
unique      1                                      15313
top         1  POINT (29.1740670000106 41.0766529991152)
freq    15316                                          2


In [8]:
print("DURUMU value distribution:")
print(gdf["DURUMU"].value_counts())
print(gdf["DURUMU"].value_counts(normalize=True).round(4) * 100, "%")

DURUMU value distribution:
DURUMU
1    15316
Name: count, dtype: int64
DURUMU
1    100.0
Name: proportion, dtype: float64 %


In [9]:
gdf = gdf.drop(columns=["DURUMU"])

In [10]:
print("Column names:")
print(gdf.columns)

null_columns = gdf.columns[gdf.isnull().all()].tolist()

if null_columns:
    print(null_columns)
else:
    print("No null columns")


Column names:
Index(['geometry'], dtype='object')
No null columns


In [11]:
gdf.to_csv("../data/iett_otobus_duraklar.csv", index=False)