In [2]:
import os
import pandas as pd
import numpy as np
# Для работы с регулярными выражениями
import re

# импортируем библиотеку для работы с картами
# from keplergl import KeplerGl

# Импорт собственных классов
from ParseLog import ParseLog
from PostgreSQL import PostgreSQL

# Импорт параметров для подключения к БД
from db_parameters import *

In [2]:
pg = PostgreSQL(
    host=HOST,
    database=DB,
    login=LOGIN,
    password=PASSWORD
)

In [3]:
list_filename = ['log/' + file for file in os.listdir('log')]

In [4]:
len(list_filename)

347

In [7]:
df_concat = pd.DataFrame(
    {
        'timestamp':[],
        'clid':[],
        'uuid':[],
        'route':[],
        'vehicle_type':[],
        'latitude':[],
        'longitude':[],
        'speed':[],
        'direction':[],
        'production':[],
        'garange_number':[],
        'reg_number':[],
        'route_descr':[]
    }
)

df_concat

Unnamed: 0,timestamp,clid,uuid,route,vehicle_type,latitude,longitude,speed,direction,production,garange_number,reg_number,route_descr


In [13]:
%%time 

df_array = []
for file in list_filename:
    parser = ParseLog(file)
    df = parser.transform_df()
    df_array.append(df)

CPU times: user 5min 22s, sys: 4.31 s, total: 5min 26s
Wall time: 5min 28s


In [14]:
len(df_array)

347

In [None]:
%%time

df = pd.concat(df_array)

df.head()

CPU times: user 8.56 s, sys: 24 s, total: 32.6 s
Wall time: 43.8 s


Unnamed: 0,timestamp,clid,uuid,route,vehicle_type,latitude,longitude,speed,direction,production,garange_number,reg_number,route_descr
0,2022-05-26 16:27:47,cherepovets,ak1456_2058,#,bus,59.128468,37.837383,24.98,326,0,,е440хк_35 [БВ (Н)],\n
1,2022-05-26 16:27:44,cherepovets,ak1456_1946,#27,bus,59.135483,37.864201,29.78,273,0,,е387хк_35 [БВ (Н)],27 []\n
2,2022-05-26 16:27:45,cherepovets,ak1456_2057,#12,bus,59.130859,37.984455,50.51,101,1,,е397хк_35 [БВ (Н)],12 [доменная - > - ул. олимпийская]\n
3,2022-05-26 16:27:43,cherepovets,ak1456_1878,#7,bus,59.127861,37.921082,0.0,0,1,,е386хк_35 [БВ (Н)],7 [ул. наседкина - > - ул. олимпийская]\n
4,2022-05-26 16:27:45,cherepovets,ak1456_1862,#18,bus,59.088779,37.913326,14.8,243,1,,е332хк_35 [БВ (Н)],18 [ул. рыбинская (2) -> пр. победы 2]\n


Наименование колонок:

- `TIMESTAMP` - время в UTC в формате дата:время
- `CLID` - vologda или cherepovets
- `UUID` - уникальный идентификатор ТС в формате id_перевозчика + "_" + id_ТС_в базе_перевозчика
- `ROUTE` - "#" + номер маршрута 
- `VEHICLE_TYPE` - 'bus', 'tramway', 'minibus', 'trolleybus'
- `LATITUDE` – широта
- `LONGITUDE` – долгота
- `SPEED` - текущая скорость
- `DIRECTION` - азимут
- `PRODUCTION` - признак производственного рейса. Когда "0" - скорее всего автобус без пассажиров. В Яндекс передаются только данные с флагом "1"
- `GARANGE_NUMBER` - опционально "человеческий" идентификатор ТС для перевозчика
- `REG_NUMBER` - госномер ТС, есть не всегда
- `ROUTE_DESCR` - текстовый идентификатор траектории маршрута в одном направлении. Нет стандартного формата, можно использовать только как "хэш", как ключ для группировки точек по траекториям

In [17]:
%%time

df.to_csv('data_cherepovets.csv',index=False)

CPU times: user 2min 22s, sys: 6.37 s, total: 2min 29s
Wall time: 2min 34s


In [20]:
%%time 

df = df[df.clid == 'cherepovets']

CPU times: user 9.09 s, sys: 26.7 s, total: 35.8 s
Wall time: 1min 43s


______
# Tramway

In [3]:
%%time

df = pd.read_csv('data_cherepovets.csv')
df = df[(df.clid == 'cherepovets')&(df.vehicle_type == 'tramway')]

df.route_descr = df.route_descr.apply(lambda x: re.sub('\n','',x))
df.route = df.route.apply(lambda x: re.sub('#','',x))

df = df[df.route_descr != '']

df.head()

CPU times: user 33 s, sys: 5.17 s, total: 38.2 s
Wall time: 40.7 s


Unnamed: 0,timestamp,clid,uuid,route,vehicle_type,latitude,longitude,speed,direction,production,garange_number,reg_number,route_descr
48,2022-05-26 16:28:04,cherepovets,chertram_41755825,4,tramway,59.131313,37.916977,9.0,282,1,,,4 (4)
49,2022-05-26 16:28:06,cherepovets,chertram_53940190,8,tramway,59.128433,37.94149,19.0,282,1,,,8 (159)
50,2022-05-26 16:27:54,cherepovets,chertram_42866217,4,tramway,59.122143,37.990363,0.0,114,1,,,4 (4)
51,2022-05-26 16:28:01,cherepovets,chertram_41773729,4,tramway,59.119607,38.008412,20.0,104,0,,,4 (5)
52,2022-05-26 16:28:01,cherepovets,chertram_42873569,4,tramway,59.135555,37.862617,30.0,92,1,,,4 (5)


In [4]:
df_agg = df.groupby(['latitude','longitude','route']).agg({
    'speed': np.median
})

df_agg.reset_index(inplace=True)

df_agg.shape

(930787, 4)

In [5]:
# df_agg.route = 
df_agg.route += ' маршрут'

In [6]:
# df_agg.head(100)

In [10]:
df_agg.to_csv('tram_cherepovets_agg.csv',index=False)

In [8]:
df_agg

Unnamed: 0,latitude,longitude,route,speed
0,59.119125,38.007228,8 маршрут,0.0
1,59.119418,38.009167,4 маршрут,15.0
2,59.119427,38.007840,4 маршрут,0.0
3,59.119427,38.009087,4 маршрут,0.0
4,59.119427,38.009122,8 маршрут,22.0
...,...,...,...,...
930782,59.138630,37.921712,8 маршрут,9.0
930783,59.138630,37.921727,8 маршрут,7.0
930784,59.138648,37.921517,8 маршрут,11.0
930785,59.138657,37.921555,8 маршрут,11.0


In [9]:
df_agg.route.unique()

array(['8 маршрут', '4 маршрут', '4А маршрут', '2 маршрут'], dtype=object)

# Bus

In [107]:
%%time

df = pd.read_csv('data_cherepovets.csv')
df = df[
    (df.clid == 'cherepovets')&
    (df.vehicle_type == 'bus')&
    (df.speed >= 0.0)&
    (df.speed <= 60.0)
]

df.route_descr = df.route_descr.apply(lambda x: re.sub('\n','',x))
df.route = df.route.apply(lambda x: re.sub('#','',x))

df = df[df.route_descr != '']

df.head()

CPU times: user 1min, sys: 11.3 s, total: 1min 11s
Wall time: 1min 16s


Unnamed: 0,timestamp,clid,uuid,route,vehicle_type,latitude,longitude,speed,direction,production,garange_number,reg_number,route_descr
1,2022-05-26 16:27:44,cherepovets,ak1456_1946,27,bus,59.135483,37.864201,29.78,273,0,,е387хк_35 [БВ (Н)],27 []
2,2022-05-26 16:27:45,cherepovets,ak1456_2057,12,bus,59.130859,37.984455,50.51,101,1,,е397хк_35 [БВ (Н)],12 [доменная - > - ул. олимпийская]
3,2022-05-26 16:27:43,cherepovets,ak1456_1878,7,bus,59.127861,37.921082,0.0,0,1,,е386хк_35 [БВ (Н)],7 [ул. наседкина - > - ул. олимпийская]
4,2022-05-26 16:27:45,cherepovets,ak1456_1862,18,bus,59.088779,37.913326,14.8,243,1,,е332хк_35 [БВ (Н)],18 [ул. рыбинская (2) -> пр. победы 2]
5,2022-05-26 16:27:48,cherepovets,ak1456_1934,27,bus,59.135277,37.857082,0.0,270,1,,е355хк_35 [БВ (Н)],27 [ул. монт - клер - > - доменная]


In [108]:
df_31 = df[df.route == '31']

In [110]:
df_agg = df_31.groupby(['latitude','longitude']).agg({
    'speed': np.median
})

df_agg.reset_index(inplace=True)

df_agg.shape

(2250816, 3)

In [111]:
df_agg

Unnamed: 0,latitude,longitude,speed
0,58.951724,38.199854,0.0
1,58.956443,38.211302,0.0
2,58.957845,38.210065,0.0
3,59.073038,37.934931,0.0
4,59.073772,37.932676,0.0
...,...,...,...
2250811,59.174946,37.946486,55.0
2250812,59.175596,37.946355,55.0
2250813,59.177141,37.946985,0.0
2250814,59.177375,37.946706,24.0


In [121]:
df_agg.to_csv('bus_31_cherepovets_agg.csv',index=False)

In [70]:
gdf = gpd.read_file('export.geojson')

In [74]:
gdf.bus.unique()

array(['yes', None], dtype=object)

In [76]:
gdf = gdf[['name','geometry']]

In [77]:
gdf.to_file(filename='bus_stop.geojson', driver="GeoJSON")

In [103]:
da = df_agg.speed.unique()

In [106]:
np.sort(da)

array([ 0.  ,  0.5 ,  1.  ,  1.5 ,  2.  ,  2.5 ,  3.  ,  3.5 ,  4.  ,
        4.5 ,  5.  ,  5.5 ,  6.  ,  6.5 ,  7.  ,  7.5 ,  8.  ,  8.5 ,
        9.  ,  9.5 , 10.  , 10.5 , 11.  , 11.5 , 12.  , 12.5 , 13.  ,
       13.5 , 14.  , 14.5 , 15.  , 15.5 , 16.  , 16.5 , 17.  , 17.5 ,
       18.  , 18.5 , 19.  , 19.5 , 20.  , 20.5 , 21.  , 21.5 , 22.  ,
       22.5 , 23.  , 23.5 , 24.  , 24.5 , 25.  , 25.16, 25.5 , 25.53,
       26.  , 26.5 , 27.  , 27.5 , 28.  , 28.12, 28.5 , 28.67, 29.  ,
       29.5 , 30.  , 30.34, 30.5 , 31.  , 31.5 , 32.  , 32.5 , 32.74,
       33.  , 33.5 , 34.  , 34.41, 34.5 , 35.  , 35.5 , 36.  , 36.5 ,
       37.  , 37.5 , 38.  , 38.5 , 39.  , 39.5 , 40.  , 40.5 , 41.  ,
       41.5 , 42.  , 42.5 , 43.  , 43.5 , 44.  , 44.5 , 45.  , 45.5 ,
       46.  , 46.5 , 47.  , 47.5 , 48.  , 48.5 , 49.  , 49.5 , 50.  ,
       50.5 , 51.  , 51.5 , 52.  , 52.5 , 53.  , 53.5 , 54.  , 54.5 ,
       55.  , 55.5 , 56.  , 56.5 , 57.  , 57.5 , 58.  , 58.5 , 59.  ,
       59.5 , 60.  ]

In [85]:
np.max(df_agg.speed)

188.0

In [91]:
df_agg = df_agg[(df_agg.speed >= 0.0) & (df_agg.speed <= 60.0)]

In [None]:
df

# Try Overpass-api

In [65]:
import overpy

In [66]:
api = overpy.Overpass()

In [67]:
result = api.query("""
/*
This has been generated by the overpass-turbo wizard.
The original search was:
“railway=tram_stop”
*/
[out:json][timeout:25];
// gather results
(
  // query part for: “railway=tram_stop”
  node["railway"="tram_stop"](59.06068293840616,37.78472900390625,59.19615304670681,38.056297302246094);
  way["railway"="tram_stop"](59.06068293840616,37.78472900390625,59.19615304670681,38.056297302246094);
  relation["railway"="tram_stop"](59.06068293840616,37.78472900390625,59.19615304670681,38.056297302246094);
);
// print results
out body;
>;
out skel qt;
    """)

In [69]:
type(result)

overpy.Result

In [28]:
for way in result.ways:
    print("Name: %s" % way.tags.get("name", "n/a"))
    print("  Highway: %s" % way.tags.get("highway", "n/a"))
    print("  Nodes:")
    for node in way.nodes:
        print("    Lat: %f, Lon: %f" % (node.lat, node.lon))

In [39]:
result.nodes

[<overpy.Node id=475354155 lat=59.1315609 lon=37.9172183>,
 <overpy.Node id=475356448 lat=59.1254892 lon=37.9658755>,
 <overpy.Node id=475356454 lat=59.1221483 lon=37.9901637>,
 <overpy.Node id=475356457 lat=59.1197031 lon=38.0077884>,
 <overpy.Node id=476289463 lat=59.1321860 lon=37.9100421>,
 <overpy.Node id=476289468 lat=59.1337185 lon=37.9035187>,
 <overpy.Node id=476289473 lat=59.1354074 lon=37.8955272>,
 <overpy.Node id=476289477 lat=59.1353632 lon=37.8896466>,
 <overpy.Node id=476289480 lat=59.1354697 lon=37.8748661>,
 <overpy.Node id=590684253 lat=59.1355862 lon=37.8581460>,
 <overpy.Node id=590822341 lat=59.1356380 lon=37.8594717>,
 <overpy.Node id=590822532 lat=59.1327173 lon=37.8397573>,
 <overpy.Node id=591853174 lat=59.1313679 lon=37.9166694>,
 <overpy.Node id=592629105 lat=59.1355552 lon=37.9189806>,
 <overpy.Node id=592632960 lat=59.1285925 lon=37.9402938>,
 <overpy.Node id=592632961 lat=59.1285125 lon=37.9409823>,
 <overpy.Node id=592632966 lat=59.1264668 lon=37.9584066

# Geopandas

In [40]:
import geopandas as gpd



In [41]:
gdf = gpd.read_file('export.geojson')

In [44]:
gdf = gdf[['name','geometry']]

In [51]:
gdf.to_file(filename='tram_stop.geojson', driver="GeoJSON")

# Test examples