This notebook is connected with story on Medium. If you want to get full understanding what I was trying to do in this notebook, then you can read my story on Medium with datailed comments

Link to the Medium post: https://medium.com/@romanmilko123/prague-cultural-places-analysis-a6a6840e4ee6

In [14]:
import requests
import pandas as pd
import plotly.graph_objects as go

In [15]:
# get json response, extract all neccesary rows and wirte down to csv file
def json_to_csv(url):
    response = requests.get(url)
    data = response.json()

    features = data['features']

    rows = []
    # "nazev" stands for "name"
    # "adresa" stands for "address"
    for item in features:
        names = item['properties']['NAZEV']
        area = item['properties']['KATUZE_NAZEV']
        addresses = item['properties']['ADRESA']
        rows.append({'name' : names, 'area': area, 'address' : addresses})
    
    # create DataFrame and write data
    df = pd.DataFrame(rows)
    df.to_csv('C:\\Users\\rastr\\data1.csv', encoding='utf-8-sig', index=False)

    return df


In [16]:
df = json_to_csv('https://opendata.iprpraha.cz/CUR/FSV/FSV_Kultura_b/S_JTSK/FSV_Kultura_b.json')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 673 entries, 0 to 672
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   name     673 non-null    object
 1   area     673 non-null    object
 2   address  673 non-null    object
dtypes: object(3)
memory usage: 15.9+ KB


In [17]:
import re

type_keywords = {
    "Museum" : ['museum', 'muzeum', 'muzea'],
    'Gallery' : ['gallery', 'galerie'],
    'Theater' : ['theater', 'divadlo', 'divadelní studio', 'studio'],
    'Library' : ['library', 'knihovna'],
    'Cinema' : ['concert hall', 'koncertí sál'],
    'Zoo' : ['zoo', 'zoologická zahrada'],
    'Monument' : ['monument', 'památník'],
    'Club' : ['club', 'klub'],
    'Bar' : ['bar'],
    'Cafe' : ['cafe', 'café'],
    'Cinema' : ['cinema', 'kino'],
    'Synagoga' : ['synagoga'],
    'Arena' : ['arena'],
    'Opera' : ['opera'],
    
}

def get_type(name):
    name = re.sub(r'[^a-zA-Z0-9]', '', name.lower())

    for type, keywords in type_keywords.items():
        if any(keyword in name for keyword in keywords):
            return type
    return None

In [18]:
# apply get_type function to each place
df['type_of_place'] = df['name'].apply(get_type)

In [19]:
df['type_of_place'].value_counts(dropna=False)

None        222
Gallery     143
Library      89
Theater      86
Museum       54
Club         28
Cinema       25
Bar          13
Synagoga      5
Cafe          4
Opera         2
Arena         2
Name: type_of_place, dtype: int64

In [20]:
# drop rows with NaN values
df.dropna(subset=['type_of_place'], inplace=True)

In [21]:
# reset indexes in DataFrame
df.reset_index(drop=True, inplace=True)

In [22]:
#pd.set_option('display.max_rows', 30)
#pd.set_option('display.max_columns', 30)
df

Unnamed: 0,name,area,address,type_of_place
0,Divadlo DISK,Staré Město,Karlova 223/26,Theater
1,Divadlo U22,Uhříněves,K sokolovně 201/8,Theater
2,Divadlo Na Cikorce,Modřany,Herrmannova 2016/24,Theater
3,Divadlo Ty-já-tr,Holešovice,Pplk. Sochora 1387/9,Theater
4,Divadlo Inspirace - scénický ateliér HAMU,Malá Strana,Malostranské náměstí 258/13,Theater
...,...,...,...,...
446,Městská knihovna v Praze - pobočka Korunní,Vinohrady,Korunní 2160/68,Library
447,Městská knihovna v Praze - pobočka Krč,Krč,Štúrova 1282/12,Library
448,Městská knihovna v Praze - pobočka Ládví,Kobylisy,Burešova 1661/2,Library
449,Městská knihovna v Praze - pobočka Lužiny,Stodůlky,Archeologická 2256/1,Library


In [23]:
# be ready that execution of this part of code can take quite long
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent='cultural_places_Prague')

def get_latitude_longtitude(address):
    location = geolocator.geocode(address)
    if location is not None:
        return (location.latitude, location.longitude)
    else:
        return (None, None)

# get latitudes and longtitudes based on addresses
df['latitude'], df['longtitude'] = zip(*df['address'].apply(get_latitude_longtitude))

# drop all NaN values
df.dropna(inplace=True)

In [24]:
# get cattegories and convert to list
labels = df['type_of_place'].value_counts().index[::].tolist()
labels

['Gallery',
 'Library',
 'Theater',
 'Museum',
 'Club',
 'Cinema',
 'Bar',
 'Synagoga',
 'Cafe',
 'Opera',
 'Arena']

In [25]:
# create a trace for the plot
trace = go.Pie(labels=df['type_of_place'].value_counts().index[::].tolist(), values=df['type_of_place'].value_counts(), 
                        domain={'x' : [0.1, 1.0], 'y' : [0.1, 1.0]}, hole=.2)

# create Figure with given trace
fig = go.Figure(data=[trace])

# show Figure
fig.show()

In [36]:
import plotly.graph_objs as go

# Prague coordinates
prague_lat = 50.0755
prague_lon = 14.4378

# coordinates of cultural places in Prague
places_lat = df['latitude']
places_lon = df['longtitude']

# each type of place has its own color
colors = {
    'Gallery' : 'rgb(113, 192, 147)',
    'Library' : 'rgb(160, 192, 113)',
    'Theater' : 'rgb(192, 179, 113)',
    'Museum' : 'rgb(192, 113, 113)',
    'Club' : 'rgb(113, 192, 192)',
    'Cinema' : 'rgb(113, 124, 192)',
    'Bar' : 'rgb(153, 113, 192)',
    'Cafe' : 'rgb(187, 113, 192)',
    'Synagoga' : 'rgb(192, 113, 134)',
    'Arena' : 'rgb(203, 76, 76)',
    'Opera' : 'rgb(41, 67, 10)'
}

# give each place its own color based on the type
marker_colors = [colors[row['type_of_place']] for _, row in df.iterrows()]

# create a trace for the map
trace = go.Scattermapbox(
    lat=places_lat,
    lon=places_lon,
    mode='markers',
    marker=dict(size=11, color=marker_colors),
    text=df['name']
)

# create the layout for the map
layout = go.Layout(
    autosize=True,
    hovermode='closest',
    mapbox=dict(
        accesstoken='access_token',
        bearing=0,
        center=dict(lat=prague_lat, lon=prague_lon),
        pitch=0,
        zoom=11
    ),
    margin=dict(l=0, r=0, t=0, b=0),
    showlegend=False
)

# combine the trace and layout into a figure
fig = go.Figure(data=[trace], layout=layout)

# set width and height parameters
fig.update_layout(
    width=1200,
    height=600
)

# display the figure
fig.show()


In [27]:
df = df[df['latitude'] >= 49.96]

In [28]:
df.dropna(inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [29]:
df

Unnamed: 0,name,area,address,type_of_place,latitude,longtitude
0,Divadlo DISK,Staré Město,Karlova 223/26,Theater,50.085939,14.417291
1,Divadlo U22,Uhříněves,K sokolovně 201/8,Theater,50.028791,14.605892
2,Divadlo Na Cikorce,Modřany,Herrmannova 2016/24,Theater,50.011845,14.413708
3,Divadlo Ty-já-tr,Holešovice,Pplk. Sochora 1387/9,Theater,50.099678,14.435291
4,Divadlo Inspirace - scénický ateliér HAMU,Malá Strana,Malostranské náměstí 258/13,Theater,50.088154,14.402084
...,...,...,...,...,...,...
446,Městská knihovna v Praze - pobočka Korunní,Vinohrady,Korunní 2160/68,Library,50.075200,14.450032
447,Městská knihovna v Praze - pobočka Krč,Krč,Štúrova 1282/12,Library,50.023383,14.451669
448,Městská knihovna v Praze - pobočka Ládví,Kobylisy,Burešova 1661/2,Library,50.127214,14.469351
449,Městská knihovna v Praze - pobočka Lužiny,Stodůlky,Archeologická 2256/1,Library,50.044070,14.331988


In [30]:
def build_map(categories):
    # Filter the DataFrame based on the selected categories
    df_categories = df[df['type_of_place'].isin(categories)]

    # Define the Prague coordinates
    prague_lat = 50.0755
    prague_lon = 14.4378

    # Define the coordinates of the cultural places in Prague
    places_lat = df_categories['latitude']
    places_lon = df_categories['longtitude']

    # Define the colors for each type of place
    colors = {
        'Gallery': 'rgb(113, 192, 147)',
        'Library': 'rgb(160, 192, 113)',
        'Theater': 'rgb(192, 179, 113)',
        'Museum': 'rgb(192, 113, 113)',
        'Club': 'rgb(113, 192, 192)',
        'Cinema': 'rgb(113, 124, 192)',
        'Bar': 'rgb(153, 113, 192)',
        'Cafe': 'rgb(187, 113, 192)',
        'Synagoga': 'rgb(192, 113, 134)',
        'Arena': 'rgb(203, 76, 76)',
        'Opera': 'rgb(41, 67, 10)'
    }

    # Define the colors for each marker based on the type of place
    marker_colors = [colors[row['type_of_place']] for _, row in df_categories.iterrows()]

    # Define the trace for the map
    trace = go.Scattermapbox(
        lat=places_lat,
        lon=places_lon,
        mode='markers',
        marker=dict(size=11, color=marker_colors),
        text=df_categories['name']
    )

    # Define the layout for the map
    layout = go.Layout(
        autosize=True,
        hovermode='closest',
        mapbox=dict(
            accesstoken='access_token',
            bearing=0,
            center=dict(lat=prague_lat, lon=prague_lon),
            pitch=0,
            zoom=11
        ),
        margin=dict(l=0, r=0, t=0, b=0),
        showlegend=False
    )

    # Combine the trace and layout into a figure
    fig = go.Figure(data=[trace], layout=layout)

    # Set the width and height of the map
    fig.update_layout(width=1200, height=600)

    # Display the figure
    fig.show()

In [31]:
# map with museums and galleries
build_map(['Museum', 'Gallery'])

In [32]:
def get_top_areas(type_of_place):
    # add to new DataFrame all places with passed type
    df_count_by_area = df[df['type_of_place'] == type_of_place]

    # compute how many places are in each area of Prague
    df_count_by_area = df_count_by_area.groupby('area').size().reset_index(name=f'{type_of_place}')
    
    # sort values
    df_count_by_area = df_count_by_area.sort_values(by=f'{type_of_place}', ascending=False)
    print(df_count_by_area)

In [33]:
get_top_areas('Gallery')

           area  Gallery
10   Nové Město       35
13  Staré Město       31
4    Holešovice       11
25       Žižkov        8
8        Karlín        6
9   Malá Strana        6
16    Vinohrady        6
0       Bubeneč        4
12      Smíchov        4
1       Břevnov        4
6      Hradčany        3
20     Vyšehrad        2
7       Josefov        2
2        Chodov        2
11        Nusle        2
19     Vysočany        2
24         Řepy        1
23     Řeporyje        1
22   Černý Most        1
21     Zbraslav        1
3       Dejvice        1
18     Vršovice        1
17     Vokovice        1
5        Holyně        1
14        Troja        1
15   Veleslavín        1


In [34]:
pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 20)
get_top_areas('Library')

               area  Library
33       Nové Město        3
13         Hostivař        3
51        Vinohrady        3
4            Chodov        3
31          Modřany        3
..              ...      ...
35        Petrovice        1
36           Prosek        1
37  Přední Kopanina        1
38          Radotín        1
0            Benice        1

[67 rows x 2 columns]
