# Pràctica LP

## Baixar i tractar les dades

In [1]:
import pandas as pd

In [2]:
import networkx as nx

In [3]:
from haversine import haversine

In [226]:
from staticmap import StaticMap, Line, CircleMarker

In [41]:
from fuzzywuzzy import fuzz



In [279]:
import telegram
from telegram.ext import Updater
from telegram.ext import CommandHandler

In [6]:
URL = 'https://github.com/jordi-petit/lp-graphbot-2019/blob/master/dades/worldcitiespop.csv.gz?raw=true'

In [7]:
COLUMNS = {
    'Country': 0,
    'AccentCity': 1,
    'Region': 2,
    'Population': 3,
    'Latitude': 4,
    'Longitude': 5
}

In [8]:
MIN_POPULATION = 100000
MAX_DISTANCE = 300
CIRCLE_SCALE = 0.000005

## Pandas

In [15]:
import gzip

In [9]:
import time
start_time = time.time()
dataframe = pd.read_csv('data/citydata.csv.gz', usecols=COLUMNS.keys(), compression='gzip')

  interactivity=interactivity, compiler=compiler, result=result)


In [44]:
dataframe.head()

Unnamed: 0,Country,AccentCity,Region,Population,Latitude,Longitude
0,ad,Aixàs,6,,42.483333,1.466667
1,ad,Aixirivali,6,,42.466667,1.5
2,ad,Aixirivall,6,,42.466667,1.5
3,ad,Aixirvall,6,,42.466667,1.5
4,ad,Aixovall,6,,42.466667,1.483333


In [65]:
dataframe[dataframe['AccentCity']=='Madrid']

Unnamed: 0,Country,AccentCity,Region,Population,Latitude,Longitude
873329,es,Madrid,29,3102644.0,40.408566,-3.69222


In [66]:
0.000005*3102644

15.51322

In [10]:
dataframe = dataframe[dataframe['Population']>100000]

In [11]:
len(dataframe)

3527

In [None]:
dataframe

In [12]:
start_time = time.time()
coordinates = dict()
coordinate_to_city = dict()
populations = dict()
for row in dataframe.iterrows():
    info = row[1]
    city = info[COLUMNS['AccentCity']]
    country = info[COLUMNS['Country']]
    region = info[COLUMNS['Region']]
    key = '{city}, {country}; {region}'.format(
        city=city,
        country=country,
        region=region
    )
    coordinates[key] = (
        info[COLUMNS['Latitude']],
        info[COLUMNS['Longitude']]
    )
    coordinate_to_city[(info[COLUMNS['Latitude']],info[COLUMNS['Longitude']])] = key.split(';')[0]
    populations[key] = info[COLUMNS['Population']]
print("--- %s seconds ---" % (time.time() - start_time))

--- 0.6108977794647217 seconds ---


In [10]:
len(coordinates)

3527

## CSV 

In [25]:
import csv
import gzip

In [53]:
import requests

In [4]:
r = requests.get(URL)

In [5]:
open('data/citydata.csv.gz', 'wb').write(r.content)

43873807

In [26]:
def process_row(row):
    '''
    Processes a row in the csv
    '''
    country, _, city, region, pop, lat, lon = row
    code = '{city}, {country}; {region}'.format(
        city=city,
        country=country,
        region=region
    )
    if pop == '':
        pop = 0
    elif '.' in pop:
        pop = pop.split('.')[0]
    return code, (float(lat), float(lon)), int(pop)

In [55]:
start_time = time.time()
coordinates = dict()
populations = dict()

with gzip.open('data/citydata.csv.gz', 'rt') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count != 0:
            city, coords, pop = process_row(row)
            if pop >= 100000:
                coordinates[city] = coords
                populations[city] = pop
        line_count += 1
print("--- %s seconds ---" % (time.time() - start_time))

--- 10.696715116500854 seconds ---


In [31]:
populations['Barcelona, es; ']

KeyError: 'Barcelona, es'

In [21]:
with gzip.open('citydata.csv.gz', 'rt') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        

## Crear el graf

In [243]:
G = nx.Graph()

In [244]:
for city in coordinates.keys():
    G.add_node(city)

In [228]:
def get_distance(coordsa, coordsb):
    return haversine(coordsa, coordsb)

In [179]:
def get_city_distance(citya, cityb):
    return get_distance(coordinates[citya], coordinates[cityb])

In [250]:
for city_a in coordinates.keys():
    for city_b in coordinates.keys():
        distance = get_city_distance(city_a, city_b)
        if city_a != city_b and distance <= MAX_DISTANCE:
            G.add_edge(city_a, city_b, weight=distance)

In [None]:
for edge in G.edges:
    print(edge)
    break

In [229]:
def is_plottable(edge, coords, dist):
    dist0 = get_distance(coordinates[edge[0]], coords)
    dist1 = get_distance(coordinates[edge[1]], coords)
    return dist0 <= dist and dist1 <= dist

def plotgraph(lat, long, dist):
    mapa = StaticMap(400, 400)
    for edge in G.edges:
        if is_plottable(edge, (lat, long), dist):
            # Staticmap needs coordinates in order (Longitude, Latitude)
            rev_coords_0 = tuple(reversed(coordinates[edge[0]]))
            rev_coords_1 = tuple(reversed(coordinates[edge[1]]))
            mapa.add_line(Line((rev_coords_0, rev_coords_1), 'blue', 3))
    image = mapa.render()
    image.save('./map.png')

In [215]:
plotgraph(coordinates['Barcelona, es; 56'][0], coordinates['Barcelona, es; 56'][1], 100)

In [230]:
def plotpop(lat, long, dist):
    mapa = StaticMap(400, 400)
    for node in G.nodes:
        if is_plottable((node, node), (lat, long), dist):
            rev_coords = tuple(reversed(coordinates[node]))
            circle = CircleMarker(rev_coords, 'red', populations[node]*CIRCLE_SCALE)
            mapa.add_marker(circle)
    image = mapa.render()
    image.save('./map.png')

In [225]:
plotpop(coordinates['Barcelona, es; 56'][0], coordinates['Barcelona, es; 56'][1], 1000)

In [277]:
def get_most_similar(name):
    max_sim = -1
    argmax = None
    for city in G.nodes:
        ratio = fuzz.ratio(city.split(';')[0], name)
        if ratio > max_sim:
            max_sim = ratio
            argmax = city
    return argmax

def route(src, dst):
    real_src = get_most_similar(src)
    real_dst = get_most_similar(dst)
    
    path = nx.algorithms.shortest_paths.generic.shortest_path(
        G,
        source=real_src,
        target=real_dst,
        weight='weight'
    )
    
    mapa = StaticMap(400, 400)
    for cities in zip(['']+path, path):
        rev_coords_1 = tuple(reversed(coordinates[cities[1]]))
        circle = CircleMarker(rev_coords_1, 'red', 4)
        mapa.add_marker(circle)
        if '' in cities:
            continue
        rev_coords_0 = tuple(reversed(coordinates[cities[0]]))
        mapa.add_line(Line((rev_coords_0, rev_coords_1), 'blue', 3))
    
    image = mapa.render()
    image.save('./map.png')

In [278]:
route('Barcelona, es', 'Zuric, ch; 25')

## Telegram

In [280]:
def start(bot, update):
    bot.send_message(chat_id=update.message.chat_id, text="Hola! Soc un bot bàsic.")

# declara una constant amb el access token que llegeix de token.txt
TOKEN = open('token.txt').read().strip()

# crea objectes per treballar amb Telegram
updater = Updater(token=TOKEN)
dispatcher = updater.dispatcher

# indica que quan el bot rebi la comanda /start s'executi la funció start
dispatcher.add_handler(CommandHandler('start', start))

# engega el bot
updater.start_polling()

<queue.Queue at 0x158e36400>

In [75]:
import re

In [85]:
if re.search(r'^\[(\-|)[0-9]+(\.|)[0-9]+ (\-|)[0-9]+(\.|)[0-9]+\]$', '[2.2 -3.2]'):
    print('A')

A


In [47]:
fuzz.ratio('Barcelona', 'Barna')

71

In [62]:
def parse_plot_args(args):
    '''
    Parses args of functions plotgraph and plotpop
    '''
    if len(args) != 2:
        bot.send_message(
            chat_id=update.message.chat_id,
            text=c.PLOTGRAPH_USAGE_ARGS
        )
        return None

    dist = args[0]
    if not dist.isdigit():
        bot.send_message(
            chat_id=update.message.chat_id,
            text=c.PLOTGRAPH_DIST_FORMAT
        )
        return None

    if re.search(c.LON_LAT_REGEX, args[1]):
        lat_lon = args[1].replace('[', '').replace(']', '').split(',')
        lat = lat_lon[0]
        lon = lat_lon[1]

        return dist, lat, lon

    bot.send_message(
        chat_id=update.message.chat_id,
        text=c.PLOTGRAPH_LON_LAT_FORMAT
    )
    return None

In [65]:
parse_plot_args(['300', '[41.3887901,2.1589899]'])

('300', '41.3887901', '2.1589899')

In [64]:
import constants as c

In [67]:
from io import BytesIO

In [1]:
from graph_handler import Graph



In [3]:
G = Graph()

Downloading data
Processing data
Read 3173959 lines
Graph created with 3527 nodes and 48159 edges!             Start asking me!


In [4]:
G.plotgraph(41.3887901, 2.1589899, 300)

## Decorators

In [9]:
# Global variables
G = None


# Decorator so start has to be the first function
def start_first(func):
    def wrapper(a, b):
        if not G:
            print('Not graph')
            return
        func(a, b)
    return wrapper

In [23]:
@u.start_first
def hola(a, b):
    print(a, b)

In [24]:
hola(1, 2)

NameError: name 'G' is not defined

In [12]:
G = True
hola(1, 2)

1 2


In [13]:
l = ['"Barcelona,', 'es"', '"Zurich,', 'ch"']

In [14]:
len(l)

4

In [16]:
r = ' '.join(l)

In [21]:
r.split('"')[3]

'Zurich, ch'

In [19]:
len(r.split('"'))

5

In [22]:
import utilities as u

In [71]:
def test(a):
    b=a

In [72]:
c = test(2)

In [73]:
type(c)

NoneType

## Better way to do the graph

In [4]:
G = nx.Graph()

In [101]:
for city in coordinates.keys():
    G.add_node(city)

In [13]:
G.add_nodes_from(coordinates.keys())

In [13]:
from scipy import spatial

In [17]:
kdtree = spatial.KDTree(list(coordinate_to_city.keys()))

In [None]:
pairs = kdtree.query_pairs(r)
G.add_edges_from(list(pairs))

In [102]:
def get_distance(coordsa, coordsb):
    return haversine(coordsa, coordsb)

In [103]:
def get_city_distance(citya, cityb):
    return get_distance(coordinates[citya], coordinates[cityb])

In [15]:
float('Hola')

ValueError: could not convert string to float: 'Hola'