# Make a map of the number of transactions per city

In [5]:
import pandas as pd
import numpy as np
import plotly.express as px
import sqlite3

In [3]:
#top 123 cities of Germany from de.csv
big_cities = pd.read_csv("de.csv")

In [16]:
big_cities.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123 entries, 0 to 122
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   city               123 non-null    object 
 1   lat                123 non-null    float64
 2   lng                123 non-null    float64
 3   country            123 non-null    object 
 4   iso2               123 non-null    object 
 5   admin_name         123 non-null    object 
 6   capital            83 non-null     object 
 7   population         122 non-null    float64
 8   population_proper  122 non-null    float64
dtypes: float64(4), object(5)
memory usage: 8.8+ KB


In [4]:
big_cities.head()

Unnamed: 0,city,lat,lng,country,iso2,admin_name,capital,population,population_proper
0,Berlin,52.52,13.405,Germany,DE,Berlin,primary,4473101.0,3677472.0
1,Stuttgart,48.7775,9.18,Germany,DE,Baden-Württemberg,admin,2787724.0,626275.0
2,Munich,48.1375,11.575,Germany,DE,Bavaria,admin,2606021.0,1487708.0
3,Hamburg,53.55,10.0,Germany,DE,Hamburg,admin,2484800.0,1853935.0
4,Cologne,50.9364,6.9528,Germany,DE,North Rhine-Westphalia,,1083498.0,1073096.0


In [6]:
## make db connection
conn = sqlite3.connect("challenge_db.db")

In [9]:
transactions_per_city = pd.read_sql("""
SELECT COUNT(totalAmount) AS num_trans, city
FROM payments JOIN locations ON payments.locationId = locations.uuid
WHERE NOT payments.status = "ERR"
GROUP BY city
""", con = conn)

In [10]:
transactions_per_city

Unnamed: 0,num_trans,city
0,187,Berlin
1,1,Bielefeld
2,1,Burgwedel
3,2,Großbottwar
4,46,Hamburg
5,1,Ibbenbüren
6,9,Karlsruhe
7,31,Kiel
8,18,Ot
9,1,Pelzerhaken


In [11]:
transactions_per_city.city[transactions_per_city.city.isin(big_cities.city)]

0        Berlin
1     Bielefeld
4       Hamburg
6     Karlsruhe
7          Kiel
10      Potsdam
Name: city, dtype: object

In [22]:
trans_big_cities = transactions_per_city[transactions_per_city.city.isin(big_cities.city)]

In [15]:
trans_big_cities.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6 entries, 0 to 10
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   num_trans  6 non-null      int64 
 1   city       6 non-null      object
dtypes: int64(1), object(1)
memory usage: 144.0+ bytes


In [13]:
trans_big_cities

Unnamed: 0,num_trans,city
0,187,Berlin
1,1,Bielefeld
4,46,Hamburg
6,9,Karlsruhe
7,31,Kiel
10,11,Potsdam


In [23]:
trans_big_cities = trans_big_cities.merge(big_cities, on = "city")

In [24]:
trans_big_cities

Unnamed: 0,num_trans,city,lat,lng,country,iso2,admin_name,capital,population,population_proper
0,187,Berlin,52.52,13.405,Germany,DE,Berlin,primary,4473101.0,3677472.0
1,1,Bielefeld,52.0211,8.5347,Germany,DE,North Rhine-Westphalia,minor,334002.0,334002.0
2,46,Hamburg,53.55,10.0,Germany,DE,Hamburg,admin,2484800.0,1853935.0
3,9,Karlsruhe,49.0092,8.404,Germany,DE,Baden-Württemberg,minor,308436.0,306502.0
4,31,Kiel,54.3233,10.1394,Germany,DE,Schleswig-Holstein,admin,246601.0,246243.0
5,11,Potsdam,52.4,13.0667,Germany,DE,Brandenburg,admin,183154.0,183154.0


In [31]:
fig = px.scatter_geo(trans_big_cities, lat='lat', lon='lng',
                     hover_name="city", hover_data = {"lat": False, "lng": False, "num_trans": True}, size="num_trans",
                     scope="europe", title='Number of transactions by (big) city',
                    center=dict(lat=51.0057, lon=13.7274)
                )

fig.update_layout(
    autosize=True,
    height=600,
    geo=dict(
        center=dict(
            lat=51.0057,
            lon=13.7274
        ),
        scope='europe',
        projection_scale=6
    )
)
fig.show()