In [1]:
import geoplotlib
from geoplotlib.utils import read_csv, BoundingBox, DataAccessObject
import pandas as pd
import numpy as np
import datetime
import json
from shapely.geometry import shape, Point

In [2]:
travels = pd.read_csv('../evo-plots/travels.csv')

In [2]:
# Nome dos geojsons a serem comparados com as coordenadas
json_name = ['grousewoods', 'north-vancouver', 'university', 'vancouver_ordenado',
             'sea-island', 'burnaby', 'new-westminster']
geojsons = []

In [3]:
for js in json_name:
    with open('../travels_hotmap/'+js+'.json') as json_data:
        geojsons.append(json.load(json_data))

In [4]:
# Separando os nomes dos distritos e agrupando em regioes em comum 
names = []
group = []
count = 0
for json in geojsons:
    count += 1
    for feature in json['features']:

        name = feature['properties']['name']
        
        if (name not in names):
            group.append(count)
            names.append(feature['properties']['name'])
            
names = pd.DataFrame(names, columns=['name'])
names['group'] = pd.DataFrame(group)

In [5]:
# Listando todas as possibilidades de saida e chegada

links = []

for i in range(len(names)):
    for j in range(len(names)):
        links.append([i,j])
        
links = pd.DataFrame(links, columns=['source', 'target'])
links['value'] = [0]*len(links)

In [7]:
# Contando a quantidade de viagens para cada trajeto

# Lists that will store the start and end district names
start_list = []
end_list = []

for (start_lon, start_lat, end_lon, end_lat, car_id) in zip(travels['Start_lon'], travels['Start_lat'], 
                                                    travels['End_lon'], travels['End_lat'], travels['Id']):
        
    start_point = Point(start_lon, start_lat)
    end_point = Point(end_lon, end_lat)
    start = end = None
    start_name = end_name = None
    found = False
    
    # Buscando em cada geojson se a viagem está em um certo distrito
    
    for json in geojsons:
        if (not found):
            
            for feature in json['features']:
                
                polygon = shape(feature['geometry'])
                
                if polygon.contains(start_point):
                    # Coletando o distrito de saida
                    start = names[names['name'] == feature['properties']['name']].index[0]
                    start_name = feature['properties']['name']
                    found = True
                    break
        else:
            break
                
    found = False
                
    for json in geojsons:
        if (not found):
            
            for feature in json['features']:
                
                polygon = shape(feature['geometry'])
                
                if polygon.contains(end_point):
                    # Coletando o distrito de chegada
                    end = names[names['name'] == feature['properties']['name']].index[0]
                    end_name = feature['properties']['name']
                    found = True
                    break
                    
        else:
            break
            
                    
    # Somando mais uma viagem no trajeto
    links['value'].loc[(links['source'] == start) & (links['target'] == end)] += 1
    start_list.append(start_name)
    end_list.append(end_name)

In [8]:
travels['start_district'] = start_list
travels['end_district'] = end_list

In [9]:
travels.head()

Unnamed: 0,Start_time,End_time,Id,Start_lat,Start_lon,End_lat,End_lon,Maps_duration,Real_duration,Fuel_start,Fuel_end,start_district,end_district
0,2018-03-01 08:26:55.380077-08:00,2018-03-01 08:33:09.325978-08:00,JTDKDTB38H1597538,49.259346,-123.12207,49.262367,-123.113274,4.416667,6.232432,52,58,Fairview,Mount Pleasant
1,2018-03-01 08:26:55.380077-08:00,2018-03-01 08:33:09.325978-08:00,JTDKDTB35J1606797,49.249508,-123.137482,49.249508,-123.137482,0.0,6.232432,85,85,Shaughnessy,Shaughnessy
2,2018-03-01 08:26:55.380077-08:00,2018-03-01 08:36:16.682113-08:00,JTDKDTB33H1598015,49.27078,-123.07148,49.27078,-123.07148,0.0,9.355034,85,85,Grandview-Woodland,Grandview-Woodland
3,2018-03-01 08:30:02.405985-08:00,2018-03-01 08:36:16.682113-08:00,JTDKDTB32F1111738,49.25653,-123.186462,49.25653,-123.186462,0.0,6.237935,64,64,Dunbar Southlands,Dunbar Southlands
4,2018-03-01 08:26:55.380077-08:00,2018-03-01 08:39:23.591149-08:00,JTDKDTB34H1598038,49.25476,-123.09726,49.25476,-123.09726,0.0,12.470185,77,77,Riley Park,Riley Park


In [10]:
travels.to_csv('../travels_hotmap/travels_district.csv', index=False, encoding='utf-8')

In [6]:
travels = pd.read_csv('../travels_hotmap/travels_district.csv')

In [11]:
links.to_csv('../travels_hotmap/links.csv', index=False, encoding='utf-8')

In [7]:
links = pd.read_csv('../travels_hotmap/links.csv')

## Organizando os dados de viagens como um fluxo de índice da linha para o da coluna

### Fluxo de todas as interações

In [8]:
flux = pd.DataFrame()
for i in range(len(names)):
    flux[i] = [0] * len(names)

In [9]:
for i in range(len(links)):
    source = links['source'].iloc[i] 
    target = links['target'].iloc[i]
    
    flux[target].iloc[source] += links['value'].iloc[i]

In [10]:
flux.index = list(names['name'])
flux.columns = list(names['name'])

In [11]:
flux.to_csv('../travels_hotmap/flux.csv', sep=' ', encoding='utf-8')

### Fluxo de viagens fora de Vancouver

In [12]:
flux = pd.DataFrame()
for i in range(names['group'].max()):
    flux[i] = [0] * names['group'].max()

In [13]:
for i in range(len(links)):
    source = names['group'].iloc[links['source'].iloc[i]] - 1
    target = names['group'].iloc[links['target'].iloc[i]] - 1
    
    flux[target].iloc[source] += links['value'].iloc[i]

In [14]:
flux.index = list(json_name)
flux.columns = list(json_name)

In [15]:
flux.to_csv('../travels_hotmap/fluxOutVancouver.csv', sep=' ', encoding='utf-8')

### Fluxo de viagens dentro de Vancouver

In [16]:
# Selecting Vancouver districts
districts = names[names['group'] == 4].index

In [17]:
flux = pd.DataFrame()
for i in range(len(districts)):
    flux[i] = [0] * len(districts)

In [18]:
links_districts = links[(links['source'] < len(districts)) & (links['target'] < len(districts))]
for i in range(len(links_districts)):
    source = links_districts['source'].iloc[i]
    target = links_districts['target'].iloc[i]
    
    flux[target].iloc[source] += links_districts['value'].iloc[i]

In [19]:
flux.index = list(names[names['group'] == 4]['name'])
flux.columns = list(names[names['group'] == 4]['name'])

In [20]:
flux.to_csv('../travels_hotmap/fluxInVancouver.csv', sep=' ', encoding='utf-8')

## Fluxo de viagens por horários

In [8]:
def str_to_datetime(df_time):
    """ 
    Reformatando de string para datetime.
    
    Parameters
    ----------
    df_time : pandas.DataFrame, string
        Dataframe com strings a serem convertidas para datetime.
    
    Returns
    ----------
    date_list : pandas.DataFrame, datetime
        Dataframe com valores em datetime para possíveis fusos de Vancouver.
    
    """
    date_list = []
    
    # Formatos de fuso horário comum de Vancouver e 
    # fuso horário característico de horário de verão
    format_string = ['%Y-%m-%d %H:%M:%S.%f-08:00', '%Y-%m-%d %H:%M:%S.%f-07:00',
                     '%Y-%m-%d %H:%M:%S-08:00', '%Y-%m-%d %H:%M:%S-07:00']
    
    
    for date in df_time:
        for fmt in format_string:
            try:
                date_list.append(datetime.datetime.strptime(str(date), fmt))
                erro = False
                break
            except:
                pass
        if erro:
            print('Erro in: '+str(date))
            
    
    return pd.DataFrame(date_list)

In [9]:
# Função para calcular distancia entre coordenadas
def haversine(lon1, lat1, lon2, lat2):

    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    m = 6367 * c * 1000

    return m

In [10]:
travels['Start_time'] = str_to_datetime(travels['Start_time'])
travels['End_time'] = str_to_datetime(travels['End_time'])

travels.dropna(axis=0, inplace=True)

In [12]:
# Adicionando coluna de horas
travels['hour'] = travels.Start_time.dt.hour

In [None]:
# Adicionando valor de distancia no dataframe
distance = []
for i in range(len(travels)):
    distance.append(haversine(travels['Start_lon'].iloc[i],travels['Start_lat'].iloc[i],
                              travels['End_lon'].iloc[i],travels['End_lat'].iloc[i]))
travels['distance'] = distance

In [None]:
# Removing travels that finish in the same place or near where started
travels_greatDistance = travels[(travels['distance'] > 150)]

In [1]:
travels_greatDistance.describe()

NameError: name 'travels_greatDistance' is not defined

In [35]:
# Creating flux csvs for each hour of day
def hour_flux(df, names, file_path):
    for hour in range(24):

        flux = pd.DataFrame()
        for i in range(len(names)):
            flux[i] = [0] * len(names)

        hour_data = df[df['hour'] == hour]

        for i in range(len(hour_data)):
            source = names[names['name'] == hour_data['start_district'].iloc[i]].index[0]
            target = names[names['name'] == hour_data['end_district'].iloc[i]].index[0]
            flux[target].iloc[source] += 1 

        flux.index = list(names['name'])
        flux.columns = list(names['name'])
    
        if hour % 3 == 0 or hour == 23: print('Hour processed: '+str(hour))

        flux.to_csv(file_path+'hour'+str(hour)+'.csv', encoding='utf-8')

In [32]:
hour_flux(travels_greatDistance, names, '../travels_hotmap/hour_analysis/')

Hour processed: 0
Hour processed: 3
Hour processed: 6
Hour processed: 9
Hour processed: 12
Hour processed: 15
Hour processed: 18
Hour processed: 21
Hour processed: 23


### Filtrando por dias de semana

In [None]:
# Column of weekdays, monday = 0, sunday = 6
travels_greatDistance['wday'] = travels_greatDistance.Start_time.dt.dayofweek

In [36]:
# Weekdays
print('Weekdays: \n')
hour_flux(travels_greatDistance[travels_greatDistance['wday'] <= 4], names,
            '../travels_hotmap/hour_analysis/weekdays/')
# Weekends
print('\n Weekends: \n')
hour_flux(travels_greatDistance[travels_greatDistance['wday'] >= 5], names,
            '../travels_hotmap/hour_analysis/weekends/')

Weekdays: 

Hour processed: 0
Hour processed: 3
Hour processed: 6
Hour processed: 9
Hour processed: 12
Hour processed: 15
Hour processed: 18
Hour processed: 21
Hour processed: 23

 Weekends: 

Hour processed: 0
Hour processed: 3
Hour processed: 6
Hour processed: 9
Hour processed: 12
Hour processed: 15
Hour processed: 18
Hour processed: 21
Hour processed: 23
