# Import Libraries and Datasets

In [None]:
import sqlite3
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil

from itertools import product
from ydata_profiling import ProfileReport

# for better resolution plots
%config InlineBackend.figure_format = 'retina'

#o svg consegue ampliar infinitamente os gráficos sem perder qualidade mas às vezes é mais lento 
#por isso agora usamos retina


sns.set()


flightsDB = pd.read_csv('data/DM_AIAI_FlightsDB.csv', sep = ",", index_col= "Loyalty#")
customerDB = pd.read_csv('data/DM_AIAI_CustomerDB.csv', sep = ",", index_col= "Loyalty#")
metaData = pd.read_csv('data/DM_AIAI_Metadata.csv', sep = ";", header= None)

In [10]:
!pip install folium



# Create Map

For the visualisation to work inside the interpreter, we need to remove security settings:

Open @tag:workspaceTrust and remove the Security › Workspace › Trust: Enabled check**.** It asks to restart the VSCode.

In [21]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import MarkerCluster
from IPython.display import display
from folium import IFrame

# Filter valid coordinates
customerDB_clean = customerDB.dropna(subset=['Longitude', 'Latitude', 'LoyaltyStatus', 'City'])

# Aggregate by city
city_summary = customerDB_clean.groupby(['City', 'Latitude', 'Longitude']).agg(
    Total_Customers=('LoyaltyStatus', 'count'),
    Star_Program=('LoyaltyStatus', lambda x: (x=='Star').sum()),
    Nova_Program=('LoyaltyStatus', lambda x: (x=='Nova').sum()),
    Aurora_Program=('LoyaltyStatus', lambda x: (x=='Aurora').sum())
).reset_index()

# Base map: center in mean of coordinates, full width
center_lat = city_summary['Latitude'].mean()
center_lon = city_summary['Longitude'].mean()
m = folium.Map(location=[center_lat, center_lon], zoom_start=5, width='100%', height=800)

# MarkerCluster
marker_cluster = MarkerCluster().add_to(m)

fill_colors = {"Star": "blue", "Nova": "green", "Aurora": "red"}

# Add one marker per city
for _, row in city_summary.iterrows():
    # Determine dominant loyalty program
    counts = {
        "Star": row['Star_Program'],
        "Nova": row['Nova_Program'],
        "Aurora": row['Aurora_Program']
    }
    dominant = max(counts, key=counts.get)
    color = fill_colors[dominant]

    # HTML popup
    html = f"""
    <div style="width: 250px; font-size: 10px; line-height: 1.2;">
        <b>City:</b> {row['City']}<br>
        <b>Total Customers:</b> {row['Total_Customers']}<br>
        <b>Star Program:</b> {row['Star_Program']}<br>
        <b>Nova Program:</b> {row['Nova_Program']}<br>
        <b>Aurora Program:</b> {row['Aurora_Program']}
    </div>
    """
    iframe = IFrame(html=html, width=100, height=80)
    popup = folium.Popup(iframe, max_width=300)

    # Scale bubble size more for visibility
    radius = max(5, np.sqrt(row['Total_Customers'])*1.3)  # sqrt scaling
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=radius,
        color='black',
        weight=0.5,
        fill=True,
        fill_color=color,
        fill_opacity=0.6,
        popup=popup
    ).add_to(marker_cluster)

# Display map directly in Jupyter Notebook
display(m)
