In [104]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import zipfile, rarfile
from datetime import datetime
import os
from tqdm import tqdm_notebook as tqdm
import geopandas as gpd
from shapely.geometry import *
from shapely.ops import *
import plotly.express as px

In [12]:
from math import radians, cos, sin, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 * 1000 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

In [132]:
metro_stops=pd.read_csv('stops_subway.csv', sep = ';', encoding='utf-8')

In [133]:
metro_stops.head()

Unnamed: 0,ssId,Станция,ss_lat,ss_long
0,2081,Алабинская,53.209188,50.133092
1,2082,Российская,53.211449,50.149668
2,2083,Московская,53.203067,50.160567
3,2084,Гагаринская,53.200304,50.176584
4,2085,Спортивная,53.200954,50.199017


In [134]:
sites=pd.read_csv('Реестр сайтов.csv', sep = ',', encoding='utf-8')

In [135]:
sites.head()

Unnamed: 0,site_id,site_lat,long
0,0,53.381994,50.166649
1,1,53.372254,50.177387
2,2,53.370353,50.18843
3,3,53.365756,50.197235
4,4,53.356007,50.20929


In [189]:
#формируем сайты с буфером
sites_with_buff = gpd.GeoDataFrame(sites, geometry = [Point(xy) for xy in zip(sites['long'], sites['site_lat'])])

In [190]:
sites_with_buff.crs='epsg:4326'

In [191]:
sites_with_buff = sites_with_buff.to_crs('epsg:32640')

In [192]:
sites_with_buff.geometry = sites_with_buff.geometry.buffer(200)
sites_with_buff = sites_with_buff.to_crs('epsg:4326')

In [193]:
metro_stops.head()

Unnamed: 0,ssId,Станция,ss_lat,ss_long,geometry
0,2081,Алабинская,53.209188,50.133092,POINT (50.13309 53.20919)
1,2082,Российская,53.211449,50.149668,POINT (50.14967 53.21145)
2,2083,Московская,53.203067,50.160567,POINT (50.16057 53.20307)
3,2084,Гагаринская,53.200304,50.176584,POINT (50.17658 53.20030)
4,2085,Спортивная,53.200954,50.199017,POINT (50.19902 53.20095)


In [194]:
#формируем геослой из остановок метро с буфером
metro_stops = gpd.GeoDataFrame(metro_stops, geometry = [Point(xy) for xy in zip(metro_stops['ss_long'], metro_stops['ss_lat'])])
metro_stops.crs='epsg:4326'

In [195]:
len(metro_stops)

20

In [196]:
len(sites_with_buff)

784

In [197]:
metro_stops2=gpd.sjoin(metro_stops, sites_with_buff[['geometry']], how='left', op='within', lsuffix='left', rsuffix='right')

In [198]:
len(metro_stops2)

20

In [199]:
metro_stops2.head()

Unnamed: 0,ssId,Станция,ss_lat,ss_long,geometry,index_right
0,2081,Алабинская,53.209188,50.133092,POINT (50.13309 53.20919),579.0
1,2082,Российская,53.211449,50.149668,POINT (50.14967 53.21145),
2,2083,Московская,53.203067,50.160567,POINT (50.16057 53.20307),31.0
3,2084,Гагаринская,53.200304,50.176584,POINT (50.17658 53.20030),201.0
4,2085,Спортивная,53.200954,50.199017,POINT (50.19902 53.20095),351.0


In [211]:
metro__kir=metro_stops2[metro_stops2['Станция']=='Кировская']
metro__kir['index_right']=101



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [212]:
metro__kir

Unnamed: 0,ssId,Станция,ss_lat,ss_long,geometry,index_right
8,2089,Кировская,53.211237,50.26944,POINT (50.26944 53.21124),101
18,2099,Кировская,53.211237,50.26944,POINT (50.26944 53.21124),101


In [215]:
metro_stops2=metro_stops2.append(metro__kir)

In [216]:
metro_stops2.to_csv('mretro_sites.csv')

In [150]:
sites_with_buff.to_file("sites_with_buff.json", driver="GeoJSON", encoding='utf-8')

In [80]:
sites['site_id'].max()

783

In [89]:
stop_seq=pd.read_csv('stop_seq.csv', sep = ';', encoding='utf-8')

In [90]:
seq_sites=pd.read_csv('Реестр остановочных пунктов в привязке к сайтам.csv', sep = ',', encoding='utf-8')

In [91]:
seq_sites=seq_sites[['ss_id','site_id']]

In [92]:
stop_seq=stop_seq.merge(seq_sites,how='left', left_on='ssId', right_on='ss_id')

In [93]:
stop_seq=stop_seq.drop('ss_id',axis=1)

In [94]:
stop_seq_null=stop_seq[stop_seq['site_id'].isna()]

In [95]:
stop_seq_null['ssId'].max()

2046

In [96]:
stop_seq_null['site_id']=stop_seq_null['ssId']+2046



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [97]:
stop_seq=stop_seq[~stop_seq['site_id'].isna()]

In [101]:
stop_seq=stop_seq.append(stop_seq_null)

In [217]:
stop_seq.to_csv('stop_seq2.csv')

In [222]:
len(stop_seq['mr_id'].unique())

80