# 1. Getting rcency frequency and monetary data 

In [15]:
import pyodbc
import pandas as pd
import plotly.express as px

conn = pyodbc.connect('DRIVER=SQL Server;\
                       Server=localhost\sqlexpress;\
                       Database=Northwind;\
                       Trusted_Connection=Yes;')
cursor = conn.cursor()

In [16]:
#importing customer segment data
df_customer = pd.read_sql_query('Select * from Northwind..rfm_customer_analysis',conn)
df_customer.head()

Unnamed: 0,custid,recency,frequency,monetary,r_score,f_score,m_score,rfm_score,segment
0,CENTC,661,1,100.8,1,1,1,111,Lost Customers
1,LAZYK,353,2,357.0,1,1,1,111,Lost Customers
2,LAUGB,129,3,522.5,1,1,1,111,Lost Customers
3,NORTS,11,3,649.0,3,1,1,311,New Customers
4,GALED,66,5,836.7,1,1,1,111,Lost Customers


In [17]:
#importing country segmentation data
df_country = pd.read_sql_query('Select * from Northwind..rfm_country_analysis',conn)
df_country.head()

Unnamed: 0,country,recency,frequency,monetary,r_score,f_score,m_score,rfm_score,segment
0,Poland,17,7,3531.95,1,1,1,111,Lost Customers
1,Norway,30,6,5735.15,1,1,1,111,Lost Customers
2,Argentina,12,16,8119.1,2,1,1,211,Promising
3,Portugal,32,13,12468.65,1,1,1,111,Lost Customers
4,Italy,10,28,16705.15,2,2,1,221,Promising


In [18]:
#getting geo data to visualize country in scatter map
df_geo = pd.read_html('https://developers.google.com/public-data/docs/canonical/countries_csv')[0]
df_geo.head()

Unnamed: 0,country,latitude,longitude,name
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.93911,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua and Barbuda
4,AI,18.220554,-63.068615,Anguilla


# 2. Data wranling

In [19]:
#replacing the inappropirate coding for US and UK
df_country['country'] = df_country['country'].replace({'UK':'United Kingdom','USA':'United States'}, regex=True)
df_country.head()

Unnamed: 0,country,recency,frequency,monetary,r_score,f_score,m_score,rfm_score,segment
0,Poland,17,7,3531.95,1,1,1,111,Lost Customers
1,Norway,30,6,5735.15,1,1,1,111,Lost Customers
2,Argentina,12,16,8119.1,2,1,1,211,Promising
3,Portugal,32,13,12468.65,1,1,1,111,Lost Customers
4,Italy,10,28,16705.15,2,2,1,221,Promising


In [20]:
#combining with the geo data
df_country = pd.merge(df_country, df_geo, how='left',left_on='country',right_on='name').drop(columns=['country_y','name'])
df_country.head()

Unnamed: 0,country_x,recency,frequency,monetary,r_score,f_score,m_score,rfm_score,segment,latitude,longitude
0,Poland,17,7,3531.95,1,1,1,111,Lost Customers,51.919438,19.145136
1,Norway,30,6,5735.15,1,1,1,111,Lost Customers,60.472024,8.468946
2,Argentina,12,16,8119.1,2,1,1,211,Promising,-38.416097,-63.616672
3,Portugal,32,13,12468.65,1,1,1,111,Lost Customers,39.399872,-8.224454
4,Italy,10,28,16705.15,2,2,1,221,Promising,41.87194,12.56738


# 3. Exploratory data analysis

Scatter plots examine the relationships between the three factors recency, frequency and monetary

In [10]:
#Recency vs. Monetary
fig = px.scatter(df_customer, x='monetary', y='recency',hover_data=['custid','monetary','recency','segment'])
fig.show()

In [11]:
#Frequency vs. Monetary
fig = px.scatter(df_customer, x='monetary', y='frequency',hover_data=['custid','monetary','frequency','segment'])
fig.show()

In [12]:
#Recency vs. Frequency
fig = px.scatter(df_customer, x='frequency', y='recency',hover_data=['custid','frequency','recency','segment'])
fig.show()

# 4. Visualization 

In [21]:
fig = px.treemap(df_customer,path=['segment'],width=900,height=600)
fig.update_layout(title='Northwind customer segmentation')
fig.show()

In [25]:
fig=px.scatter_mapbox(df_country, 
                      lat        = df_country['latitude'], 
                      lon        = df_country['longitude'],
                      color      = df_country['segment'],
                      size       = df_country['rfm_score'],
                      hover_name = 'country_x', 
                      hover_data = ['rfm_score'],
                      zoom       = 1)

fig.update_layout(mapbox_style='open-street-map')
fig.update_layout(margin={'r':0,'t':50,'l':0,'b':10})
fig.show()