In [1]:
import folium
from folium.plugins import MarkerCluster, Fullscreen, TagFilterButton, MiniMap
import geopandas as gpd
import pandas as pd
import numpy as np

## Reading query data

In [2]:
query_result = pd.read_csv('df_result/df_results.csv')
query_result = query_result.loc[query_result['feesEUR'].notna()]   # Dropping the rows with null fee values
query_result.head()

Unnamed: 0,index,courseName,universityName,facultyName,isItFullTime,description,startDate,fees,modality,duration,city,country,administration,url,prep_description,feesEUR,Similarity
9,1355,Data Science MSc,Sabanci University,Masters Programmes,Full time,Data Science Master`s Degree Program aims to p...,See Course,$19.500 USD per year,MSc,See website for details.,Istanbul,Turkey,On Campus,820b2bb704c570fdd1cdcfeb04bb19b1.html,"['data', 'scienc', 'master', 'degre', 'program...",17886.626307,0.734564
11,1373,Data Science MSc,University of Wolverhampton,"School of Engineering, Computing and Mathemati...",Full time,"The MSc Data Science is a conversion MSc, desi...","September, March",Home - Full-time - £8395 per yearHome - Part-t...,MSc,"14 months full time, 28 months part time",Wolverhampton,United Kingdom,On Campus,5449805c40d065b866d5c89aa9f15a78.html,"['msc', 'data', 'scienc', 'convers', 'msc', 'd...",17756.579703,0.724294
15,1328,Data Science for Life Sciences,Hanze University of Applied Sciences,Masters courses,Full time,IntroductionThe Data Science for Life Sciences...,September,"EU/EEA students €2,314non-EU/EEA students €8,2...",MSc,18 Months,Groningen,Netherlands,On Campus,761882ea95fd78b3412cddbae2ee251b.html,"['introductionth', 'data', 'scienc', 'life', '...",8276.0,0.714819
16,1268,Data Science - MSc,University of Helsinki,International Masters Degree Programmes,Full time,Goal of the pro­grammeData science combines co...,September,Tuition fee per year (non-EU/EEA students): 15...,MSc,2 years,Helsinki,Finland,On Campus,65891bf21a7e6694db11409fa4d05ad1.html,"['goal', 'programmedata', 'scienc', 'combin', ...",15000.0,0.712502
23,1296,Data Science (PGCert/PGDip/MSc) - online,University of St Andrews,School of Computer Science,Full time,The Data Science course develops core skills i...,"September, January","MSc (three years) £18,000 (charged £6,000 per ...",PGCert,3 years full time,St Andrews,United Kingdom,,4cf17e753ec8224387054ccd290a1a7a.html,"['data', 'scienc', 'cours', 'develop', 'core',...",20687.277324,0.701891


## Geocoding

In [3]:
# Concat values of 'universityName', 'city', and 'country' columns (containing address data) into a new column to pass to the geocoder
query_result['address'] = query_result[['universityName', 'city', 'country']].apply(lambda row: ','.join(map(str, row)), axis=1)
query_result.head()

Unnamed: 0,index,courseName,universityName,facultyName,isItFullTime,description,startDate,fees,modality,duration,city,country,administration,url,prep_description,feesEUR,Similarity,address
9,1355,Data Science MSc,Sabanci University,Masters Programmes,Full time,Data Science Master`s Degree Program aims to p...,See Course,$19.500 USD per year,MSc,See website for details.,Istanbul,Turkey,On Campus,820b2bb704c570fdd1cdcfeb04bb19b1.html,"['data', 'scienc', 'master', 'degre', 'program...",17886.626307,0.734564,"Sabanci University,Istanbul,Turkey"
11,1373,Data Science MSc,University of Wolverhampton,"School of Engineering, Computing and Mathemati...",Full time,"The MSc Data Science is a conversion MSc, desi...","September, March",Home - Full-time - £8395 per yearHome - Part-t...,MSc,"14 months full time, 28 months part time",Wolverhampton,United Kingdom,On Campus,5449805c40d065b866d5c89aa9f15a78.html,"['msc', 'data', 'scienc', 'convers', 'msc', 'd...",17756.579703,0.724294,"University of Wolverhampton,Wolverhampton,Unit..."
15,1328,Data Science for Life Sciences,Hanze University of Applied Sciences,Masters courses,Full time,IntroductionThe Data Science for Life Sciences...,September,"EU/EEA students €2,314non-EU/EEA students €8,2...",MSc,18 Months,Groningen,Netherlands,On Campus,761882ea95fd78b3412cddbae2ee251b.html,"['introductionth', 'data', 'scienc', 'life', '...",8276.0,0.714819,"Hanze University of Applied Sciences,Groningen..."
16,1268,Data Science - MSc,University of Helsinki,International Masters Degree Programmes,Full time,Goal of the pro­grammeData science combines co...,September,Tuition fee per year (non-EU/EEA students): 15...,MSc,2 years,Helsinki,Finland,On Campus,65891bf21a7e6694db11409fa4d05ad1.html,"['goal', 'programmedata', 'scienc', 'combin', ...",15000.0,0.712502,"University of Helsinki,Helsinki,Finland"
23,1296,Data Science (PGCert/PGDip/MSc) - online,University of St Andrews,School of Computer Science,Full time,The Data Science course develops core skills i...,"September, January","MSc (three years) £18,000 (charged £6,000 per ...",PGCert,3 years full time,St Andrews,United Kingdom,,4cf17e753ec8224387054ccd290a1a7a.html,"['data', 'scienc', 'cours', 'develop', 'core',...",20687.277324,0.701891,"University of St Andrews,St Andrews,United Kin..."


In [4]:
# Ckeck if there are any null values in the 'address' column
query_result.loc[query_result['address'].isna()]   # OK

Unnamed: 0,index,courseName,universityName,facultyName,isItFullTime,description,startDate,fees,modality,duration,city,country,administration,url,prep_description,feesEUR,Similarity,address


In [5]:
# For encoding geo-information data, we used geopandas which supports geocoding with geopy library.
# Geopy supports different geocoding service providers. At first, we used Nominatim (OpenStreetMap ), but returned Nan values for several addresses.
# Then, we tried OpenCage which perfectly geocoded all the addresses. To use this service, we signed up for an API key which allows 2500 geocoding requests per day.
geocoded_addresses = gpd.tools.geocode(query_result["address"], provider="opencage", api_key="4da63ae7c3534cfea8aa41c69b3c9c8c", user_agent="SSP", timeout=20)
geocoded_addresses.head()
# The result is a GeoDataFrame that contains the original address column and a 'geometry' column containing Shapely Point objects.

Unnamed: 0,geometry,address
9,POINT (29.37863 40.89272),"Sabancı University, 30 Ağustos Caddesi, 34956 ..."
11,POINT (-2.12756 52.59083),"University of Wolverhampton, Ring Road St. Pet..."
15,POINT (6.56667 53.21917),"Groningen, Netherlands"
16,POINT (24.95355 60.17565),"University of Helsinki, Snellmaninaukio, 00170..."
23,POINT (-2.81179 56.33982),"University of St Andrews, North Street, St And..."


In [6]:
# Ckeck if there are any null values in the geocoded 'address' column
geocoded_addresses.loc[geocoded_addresses['address'].isna()]   # OK

Unnamed: 0,geometry,address


In [9]:
# Since the number of rows as well as their order are the same, we can simply join geocoded_addresses to our query_result dataframe. 
query_geocoded = geocoded_addresses.join(query_result, lsuffix='_geocoded', rsuffix='_res')
query_geocoded.head()
# The output of this step is a geodataframe containing the latitude and longitude for each MSc course.

Unnamed: 0,geometry,address_geocoded,index,courseName,universityName,facultyName,isItFullTime,description,startDate,fees,modality,duration,city,country,administration,url,prep_description,feesEUR,Similarity,address_res
9,POINT (29.37863 40.89272),"Sabancı University, 30 Ağustos Caddesi, 34956 ...",1355,Data Science MSc,Sabanci University,Masters Programmes,Full time,Data Science Master`s Degree Program aims to p...,See Course,$19.500 USD per year,MSc,See website for details.,Istanbul,Turkey,On Campus,820b2bb704c570fdd1cdcfeb04bb19b1.html,"['data', 'scienc', 'master', 'degre', 'program...",17886.626307,0.734564,"Sabanci University,Istanbul,Turkey"
11,POINT (-2.12756 52.59083),"University of Wolverhampton, Ring Road St. Pet...",1373,Data Science MSc,University of Wolverhampton,"School of Engineering, Computing and Mathemati...",Full time,"The MSc Data Science is a conversion MSc, desi...","September, March",Home - Full-time - £8395 per yearHome - Part-t...,MSc,"14 months full time, 28 months part time",Wolverhampton,United Kingdom,On Campus,5449805c40d065b866d5c89aa9f15a78.html,"['msc', 'data', 'scienc', 'convers', 'msc', 'd...",17756.579703,0.724294,"University of Wolverhampton,Wolverhampton,Unit..."
15,POINT (6.56667 53.21917),"Groningen, Netherlands",1328,Data Science for Life Sciences,Hanze University of Applied Sciences,Masters courses,Full time,IntroductionThe Data Science for Life Sciences...,September,"EU/EEA students €2,314non-EU/EEA students €8,2...",MSc,18 Months,Groningen,Netherlands,On Campus,761882ea95fd78b3412cddbae2ee251b.html,"['introductionth', 'data', 'scienc', 'life', '...",8276.0,0.714819,"Hanze University of Applied Sciences,Groningen..."
16,POINT (24.95355 60.17565),"University of Helsinki, Snellmaninaukio, 00170...",1268,Data Science - MSc,University of Helsinki,International Masters Degree Programmes,Full time,Goal of the pro­grammeData science combines co...,September,Tuition fee per year (non-EU/EEA students): 15...,MSc,2 years,Helsinki,Finland,On Campus,65891bf21a7e6694db11409fa4d05ad1.html,"['goal', 'programmedata', 'scienc', 'combin', ...",15000.0,0.712502,"University of Helsinki,Helsinki,Finland"
23,POINT (-2.81179 56.33982),"University of St Andrews, North Street, St And...",1296,Data Science (PGCert/PGDip/MSc) - online,University of St Andrews,School of Computer Science,Full time,The Data Science course develops core skills i...,"September, January","MSc (three years) £18,000 (charged £6,000 per ...",PGCert,3 years full time,St Andrews,United Kingdom,,4cf17e753ec8224387054ccd290a1a7a.html,"['data', 'scienc', 'cours', 'develop', 'core',...",20687.277324,0.701891,"University of St Andrews,St Andrews,United Kin..."


In [10]:
# Convert values of 'feesEUR' column from numpy.float64 to integers to omit decimals
query_geocoded['feesEUR'] = query_geocoded['feesEUR'].astype(np.int64)

In [11]:
def fee_cat(fees):
    ''' This function categorizes the fee 
    amounts into 6 categories.
    '''
    if fees < 1000:
        return 'Less than 1000 €'
    elif 1000 <= fees < 5000:
        return '1000 to 5000 €'
    elif 5000 <= fees < 10000:
        return '5000 to 10000 €'
    elif 10000 <= fees < 15000:
        return '10000 to 15000 €'
    elif 15000 <= fees < 20000:
        return '15000 to 20000 €'
    else:
        return 'More than 20000 €'

# Applying the fee_cat function to the 'feesEUR' column and save them into a new column ('fees_Cat')
query_geocoded['fees_Cat'] = query_geocoded['feesEUR'].apply(lambda fee: fee_cat(fee))

## Visualization

In [18]:
# To visualize the most relevant MSc degrees, we created an interactive web map by using the folium package.

# Creating the map object
interactive_map = folium.Map(location=(51.7548, 2), zoom_start=7, control_scale=True)

# Using a dictionary to define colour categories and corresponding fee ranges (similar to the ranges defined in 'fees_Cat' column)
# It is used to add tags for each marker (a.k.a MSc course), which are used for the TagFilterButton on the upper-left side of the map.
color_categories = {'Less than 1000 €': 'green', '1000 to 5000 €': 'pink', '5000 to 10000 €': 'orange',
                    '10000 to 15000 €': 'red', '15000 to 20000 €': 'purple', 'More than 20000 €': 'black'}

# There are multiple overlapping courses. we created a MarkerCluster object to group markers for better visualization.
# The icon_create_function is set to None to use default markers.
marker_cluster = folium.plugins.MarkerCluster(overlay=True, control=False, icon_create_function=None, name='Courses').add_to(interactive_map)

# Creating markers by using a for loop and iterating over query_geocoded rows, as well as adding corresponding tags to each marker
for index, row in query_geocoded.iterrows():
    fees = row['feesEUR']
    category = row['fees_Cat']
    color = color_categories.get(category)   # the value for each category returned from the color_categories dictionary & is used as the marker's colour
    # Creating marker objects within the MarkerCluster
    folium.Marker(location=[row['geometry'].y, row['geometry'].x],  # Using latitude and longitude of the geocoded addresses
                  popup=f"Course Name:\n {row['courseName']} \n Annual Fees:\n {fees} €",  # This is the text that pops up when clicking on each marker (a.k.a MSc course)
                  tooltip="Click me!", icon=folium.Icon(color=color, icon='university', prefix='fa'),  # Customizing the marker icon and color
                  tags=[category]  # This tag will be recognized by the TagFilterButton plugin, thus we can filter our data based on the categories defined above.
                  ).add_to(marker_cluster)  # This adds course markers to the courses FeatureGroup using the MarkerCluster

# Add a button to toggle a fullscreen view of the map under the LayerControl button.
folium.plugins.Fullscreen(position="topright", title="Expand me",
                          title_cancel="Exit me", force_separate_button=True, ).add_to(interactive_map)

# This list is passed to TagFilterButton to identify the tags to be filtered.
categories = ['Less than 1000 €', '1000 to 5000 €', '5000 to 10000 €',
              '10000 to 15000 €', '15000 to 20000 €', 'More than 20000 €']

# Add a button for tag filtering based on the list of fee categories
folium.plugins.TagFilterButton(categories).add_to(interactive_map)

# Add a minimap for easier navigation
folium.plugins.MiniMap(position="bottomleft", zoom_level_offset=-5, toggle_display=True).add_to(interactive_map)


# Saving the map to an HTML file so that it can be opened in any web browser
interactive_map.save('Interactive/interactive_map.html')

# Adding legend
# It turned out that Fulium only has a built-in legend for the choropleth legend. Source: https://github.com/python-visualization/folium/issues/528
# We found out that there are a couple of ways to add a legend to a folium interactive map.
# Among them, we found using HTML is probably the best solution.
# This part of the code is adapted from https://nbviewer.org/gist/talbertc-usgs/18f8901fc98f109f2b71156cf3ac81cd
from branca.element import Template, MacroElement

# The Template class allows users to create custom HTML structures with placeholders that can be embedded into Folium maps.
template = """
{% macro html(this, kwargs) %}

<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>jQuery UI Draggable - Default functionality</title>
  <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">

  <script src="https://code.jquery.com/jquery-1.12.4.js"></script>
  <script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script>
  
  <script>
  $( function() {
    $( "#maplegend" ).draggable({
                    start: function (event, ui) {
                        $(this).css({
                            right: "auto",
                            top: "auto",
                            bottom: "auto"
                        });
                    }
                });
});

  </script>
</head>
<body>

 
<div id='maplegend' class='maplegend' 
    style='position: absolute; z-index:9999; border:2px solid grey; background-color:rgba(255, 255, 255, 0.8);
     border-radius:6px; padding: 10px; font-size:14px; right: 20px; bottom: 20px;'>
     
<div class='legend-title'>Legend </div>
<div class='legend-scale'>
  <ul class='legend-labels'>
    <li><span style='background:green;opacity:0.7;'></span>Less than 1000 €</li>
    <li><span style='background:pink;opacity:0.7;'></span>1000 to 5000 €</li>
    <li><span style='background:orange;opacity:0.7;'></span>5000 to 10000 €</li>
    <li><span style='background:red;opacity:0.7;'></span>10000 to 15000 €</li>
    <li><span style='background:purple;opacity:0.7;'></span>15000 to 20000 €</li>
    <li><span style='background:black;opacity:0.7;'></span>More than 20000 €</li>

  </ul>
</div>
</div>
 
</body>
</html>

<style type='text/css'>
  .maplegend .legend-title {
    text-align: left;
    margin-bottom: 5px;
    font-weight: bold;
    font-size: 90%;
    }
  .maplegend .legend-scale ul {
    margin: 0;
    margin-bottom: 5px;
    padding: 0;
    float: left;
    list-style: none;
    }
  .maplegend .legend-scale ul li {
    font-size: 80%;
    list-style: none;
    margin-left: 0;
    line-height: 18px;
    margin-bottom: 2px;
    }
  .maplegend ul.legend-labels li span {
    display: block;
    float: left;
    height: 16px;
    width: 30px;
    margin-right: 5px;
    margin-left: 0;
    border: 1px solid #999;
    }
  .maplegend .legend-source {
    font-size: 80%;
    color: #777;
    clear: both;
    }
  .maplegend a {
    color: #777;
    }
</style>
{% endmacro %}"""

# The MacroElement class is used to create a custom map element that incorporates the HTML template defined above by using the Template class. 
macro = MacroElement()
macro._template = Template(template)

# Incorporating the map element (template) into the root of a Folium map, and enabling the display of it on the map.
interactive_map.get_root().add_child(macro)

## How to use the interactive map
Please use the full-screen mode (by clicking the Expand me button) for the best experience. Every time you change the zoom scale, the number of clusters changes too. At the lowest zoom level (fully zoomed out), there is (most probably) one cluster containing all the courses. Clusters are separated out when you zoom closer. When fully zoomed in, you can access the information of the overlapped courses (a.k.a cluster) by clicking on the cluster. There is also a minimap on the bottom-right side of the map to help you find the current location more easily as well as a filter button on the top-left side for selecting courses based on predefined fee ranges.