## Library Import

In [1]:
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
import geopandas as gpd
import folium
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns


## Data Import

In [2]:
data= pd.read_csv("data_for_geo.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,customer_id,order_item_sequence_id,amount_prod_categories,price,transaction_value,shipping_cost,review_score,frequency,recency,...,Segment1,Cluster,type_of_client,customer_autonomous_community,customer_city,geo_latitude,geo_longitude,geo_admin1_code,geo_autonomous_community,cluster_value
0,0,9a736b248f67d166d2fbb006bcb877c3,138,75,77580,12179.0,140598,5.0,75,385,...,Old firends,3,True Friends,Comunidad de Madrid,Madrid,40.4165,-3.70256,29.0,Comunidad de Madrid,0.476523
1,1,9a736b248f67d166d2fbb006bcb877c3,138,75,77580,12179.0,140598,5.0,75,385,...,Old firends,3,True Friends,Comunidad de Madrid,Madrid,40.4165,-3.70256,29.0,Comunidad de Madrid,0.476523
2,2,6fbc7cdadbb522125f4b27ae9dee4060,57,38,303962,38646.0,86944,5.0,38,340,...,Old firends,3,True Friends,Cataluña,Barcelona,41.38879,2.15899,56.0,Cataluña,0.541163
3,3,f9ae226291893fda10af7965268fb7f6,47,35,54365,94647.0,22819,1.942857,35,535,...,Old firends,3,True Friends,Andalucía,Cádiz,,,,,0.507631
4,4,f9ae226291893fda10af7965268fb7f6,47,35,54365,94647.0,22819,1.942857,35,535,...,Old firends,3,True Friends,Andalucía,Cádiz,,,,,0.507631


## Geographic Visualization with Plotly

Thanks to Plotly some interactive maps are created, the first one represent the number of customer per city.
First, data are grouped by their geographic information and the number of customer per city is found through a *count()*

In [3]:
#count customer per latitude and longitude
count_customer= data.groupby(["geo_latitude","geo_longitude","customer_city"]).customer_id.count().reset_index().sort_values(by="customer_id", ascending=False)


In [6]:
#convert count_customer to dataframe
count_customer_df= pd.DataFrame(count_customer)

Map showing the number of customer per city

In [5]:
fig = px.scatter_mapbox(count_customer_df, lat="geo_latitude", lon="geo_longitude", zoom=4.5, size="customer_id", color = "customer_id", title="Number of customer per City",size_max=20, hover_name="customer_city", mapbox_style="carto-positron",
color_continuous_scale=px.colors.sequential.algae)
fig.show()
import plotly.io as pio
pio.write_html(fig, file="map_customer_city.html", auto_open=True)

For the second map the average order shipping cost per city has been drawn

In [7]:
#count average shipping cost per city
count_shipping_cost= data.groupby(["geo_latitude","geo_longitude","customer_city"]).shipping_cost.mean().reset_index().sort_values(by="shipping_cost", ascending=False)

In [8]:
count_shipping_cost=pd.DataFrame(count_shipping_cost)

In [9]:
fig = px.scatter_mapbox(count_shipping_cost, lat="geo_latitude", lon="geo_longitude", zoom=4.5, size="shipping_cost", title="Average shipping cost per city of origin",color = "shipping_cost", size_max=10, hover_name="customer_city", mapbox_style="carto-positron",color_continuous_scale=px.colors.sequential.algae)
fig.show()
import plotly.io as pio
pio.write_html(fig, file="map_shipping_cost_origin.html", auto_open=True)