In [None]:
# Importing the libraries to use
import numpy as np
import pandas as pd
import folium
import geojson
from geojson import FeatureCollection, Feature
from branca.colormap import linear

In [None]:
import os

month_files = os.listdir("resources/data")
for month_file in month_files:
    
    # Importing the Vodafone DataSet from a CSV file
    dfVodafone = pd.read_csv("resources/data/June2023.csv")

    # Transforming all columns from C1 to C11
    dfVodafone['C1'] = dfVodafone['C1'].apply(np.int64)
    dfVodafone['C2'] = dfVodafone['C2'].apply(np.int64)
    dfVodafone['C3'] = dfVodafone['C3'].apply(np.int64)
    dfVodafone['C4'] = dfVodafone['C4'].apply(np.int64)
    dfVodafone['C5'] = dfVodafone['C5'].apply(np.int64)
    dfVodafone['C6'] = dfVodafone['C6'].apply(np.int64)
    dfVodafone['C7'] = dfVodafone['C7'].apply(np.int64)
    dfVodafone['C8'] = dfVodafone['C8'].apply(np.int64)
    dfVodafone['C9'] = dfVodafone['C9'].apply(np.int64)
    dfVodafone['C10'] = dfVodafone['C10'].apply(np.int64)
    dfVodafone['C11'] = dfVodafone['C11'].apply(np.int64)
    
    # Changing the data type to date and with the format we want
    dfVodafone['Datetime'] = pd.to_datetime(dfVodafone['Datetime'], format="%Y-%m-%dT%H:%M:%S.%fZ")
    
    # Changing the data type to date and with the format we want
    dfVodafone['Datetime'] = pd.to_datetime(dfVodafone['Datetime'], format="%Y-%m-%dT%H:%M:%S.%fZ")
    
    # Removing columns that we don't care about
    dfVodafone = dfVodafone[["Grid_ID", "Datetime", "C1", "C2", "D1"]]
    
    # Importing the Vodafone Metadata with information about the cells
    dfGrids = pd.read_csv("resources/metadata/wktComplete.csv", encoding="ISO-8859-1")
    
    # Removing columns that we don't care about
    dfGrids = dfGrids[["grelha_id", "freguesia", "latitude", "longitude", "nome", "position"]]
    
    # Filtering by night time
    dfVodafone = dfVodafone.set_index('Datetime').between_time('02:00', '07:00').reset_index()
    
    # Grouping by Grid ID and Day, aggregating by the minimum number of terminals each night for every cell
dfSleepByDay = dfVodafone.assign(day=dfVodafone["Datetime"].dt.day)
dfSleepByDay = dfSleepByDay.groupby(["Grid_ID", "day"], as_index=False).agg(C2=("C2", "min"), Datetime=('Datetime', "min"))
dfSleepByDay.rename(columns={"C2": "Sleeping"}, inplace=True)

In [None]:
# Grouping by Grid ID and Day, aggregating by the minimum number of terminals each night for every cell
dfSleepByDay = dfVodafone.assign(day=dfVodafone["Datetime"].dt.day)
dfSleepByDay = dfSleepByDay.groupby(["Grid_ID", "day"], as_index=False).agg(C2=("C2", "min"), Datetime=('Datetime', "min"))
dfSleepByDay.rename(columns={"C2": "Sleeping"}, inplace=True)

In [None]:
dfSleepByDay.info()

In [None]:
# Grouping by Grid ID, aggregating by the mean number of minimum of terminals each night
dfSleepMean = dfSleepByDay.groupby(["Grid_ID"])["Sleeping"].mean().reset_index()

In [None]:
dfSleepMean.info()

In [None]:
# Adding the information about the cells to our dataframe
dfSleepMean = dfSleepMean.merge(dfGrids, left_on='Grid_ID', right_on='grelha_id')

In [None]:
dfSleepMean.info()

In [None]:
# Creating a linear color map
colormap = linear.YlOrRd_09.scale(
    dfSleepMean["Sleeping"].min(), dfSleepMean["Sleeping"].max()
)

colormap

In [None]:
Sleeping_dict = dfSleepMean.set_index("Grid_ID")["Sleeping"]
color_dict = {key: colormap(Sleeping_dict[key]) for key in Sleeping_dict.keys()}

In [None]:
# Saving a Heat Map
map = folium.Map(location=[38.743094, -9.145999], zoom_start=13)

features = []
for index, sleepByDay in dfSleepMean.iterrows():
    feature = Feature(
        geometry=geojson.loads(sleepByDay["position"].replace("'", '"'))[0],
        properties={
            "Grid_ID": sleepByDay["Grid_ID"],
            "name": sleepByDay["nome"],
            "freguesy": sleepByDay["freguesia"],
            "sleeping": sleepByDay["Sleeping"],
        }
    )
    features.append(feature)

feature_collection = FeatureCollection(features)

tooltip = folium.GeoJsonTooltip(
    fields=["name", "freguesy", "sleeping"],
    aliases=["Zona:", "Freguesia:", "A Dormir:"],
    localize=True,
    sticky=False,
    labels=True,
    style="""
        background-color: #F0EFEF;
        border: 2px solid black;
        border-radius: 3px;
        box-shadow: 3px;
    """,
    max_width=800,
)

folium.GeoJson(
    data=feature_collection,
    style_function=lambda feature: {
        "fillColor": color_dict[feature.properties["Grid_ID"]],
        "color": "black",
        "weight": 0,
        "fillOpacity": .5,
    },
    tooltip=tooltip
).add_to(map)

colormap.add_to(map)

map.save('dist/sleeping_cells_heatmap.html')

map

In [None]:
# Showing top 20 cells
dfTop = dfSleepMean.nlargest(20, "Sleeping")[["Grid_ID", "nome", "freguesia", "Sleeping"]]
dfTop

In [None]:
# Group by freguesy, aggregate by sum
dfSleepMeanFreg = dfSleepMean.groupby("freguesia")["Sleeping"].sum().reset_index()

In [None]:
dfSleepMeanFreg.info()

In [None]:
dfTopFreg = dfSleepMeanFreg.nlargest(20, "Sleeping")[["freguesia", "Sleeping"]]
dfTopFreg.to_csv("dist/dfTopFreg.csv")
dfTopFreg

In [None]:
# Grouping by Weekday, aggregating by the mean number of minimum of terminals each weekday
dfSleepByWeekday = dfSleepByDay.assign(Weekday=dfSleepByDay['Datetime'].dt.day_name())
dfSleepByWeekdayMean = dfSleepByWeekday.groupby(["Weekday"], as_index=False)["Sleeping"].mean()

In [None]:
dfSleepByWeekdayMean.to_csv("dist/dfSleepByWeekdayMean.csv")
dfSleepByWeekdayMean