In [1]:
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import ipywidgets as widgets
import numpy as np
from scipy import special
import plotly.express as px
import plotly.figure_factory as ff

py.offline.init_notebook_mode(connected=True)

In [2]:
df = pd.read_csv('../Data/us_cities_with_lat_lon.csv')
df.head()

Unnamed: 0,country,billing_city,billing_region,sum,latitude,longitude,sum actual
0,United States,Abbeville,Alabama,30,31.575479,-85.279044,$30
1,United States,Abbeville,Louisiana,62,29.958828,-92.142655,$62
2,United States,Abbeville,South Carolina,40,34.18186,-82.378452,$40
3,United States,Abbotsford,Wisconsin,42,44.964057,-90.299438,$42
4,United States,Aberdeen,Idaho,152,42.976717,-112.818124,$152


In [3]:
len(df)

11553

In [4]:
df_sales=df.loc[df['sum'] >= 1]
df_sales.head()

Unnamed: 0,country,billing_city,billing_region,sum,latitude,longitude,sum actual
0,United States,Abbeville,Alabama,30,31.575479,-85.279044,$30
1,United States,Abbeville,Louisiana,62,29.958828,-92.142655,$62
2,United States,Abbeville,South Carolina,40,34.18186,-82.378452,$40
3,United States,Abbotsford,Wisconsin,42,44.964057,-90.299438,$42
4,United States,Aberdeen,Idaho,152,42.976717,-112.818124,$152


In [5]:
len(df_sales)

11553

In [19]:
df_sales['text'] = df_sales['billing_city'] + ', ' + df_sales['billing_region'] + '  $' +  df_sales['sum'].astype(str)

fig_map = go.Figure(data=go.Scattergeo(
        lon = df_sales['longitude'],
        lat = df_sales['latitude'],
        text = df_sales['text'],
        mode = 'markers',
        hovertemplate =
           '<i>City:</i>'+
           '<br><b>%{text}</b>',
        marker=dict(
            size=df_sales['sum']/600,
            color=df_sales['sum'],
            showscale=True,
            colorbar_title="Sales($)<br>2021"
            
        )
            
        ))

fig_map.update_layout(
        title_text = 'Vitamin Ventures - 2021 US Cities Sales<br>(Click legend to toggle traces)',
        
        geo = dict(
            scope = 'usa',
            landcolor = 'rgb(217, 217, 217)'
        )
    )


fig_map.show()
# Write to html file
fig_map.write_html("../Graphs/2021_US_Cities_Sales.html")

In [7]:
df_states=pd.read_csv('../Data/top_10_us_states_2021_sales.csv')
df_states=df_states.sort_values(by='sum', ascending=True)
df_states.head()

Unnamed: 0,billing_region,year,sum,sum_actual
8,Virginia,2021,114648,114647.765
5,North Carolina,2021,126278,126278.19
6,Pennsylvania,2021,129322,129321.91
3,New Jersey,2021,140342,140342.385
2,Illinois,2021,149560,149560.39


In [20]:
df_states['text'] = df_states['billing_region'] + '  $' + df_states['sum'].astype(str)
fig2 = px.bar(df_states, y='billing_region',
             x='sum',
             color="sum",
             text=df_states['text'],
             orientation='h',
             labels={"sum": "Sales ($)","billing_region":'US States'}, 
             title="Top 10 States in US - by 2021 Sales")
fig2.update_traces(textposition='inside')

fig2.show()
fig2.write_html("../Graphs/Top_10_States_2021.html")

In [9]:
df_sales=df_sales.sort_values(by='sum', ascending=False)
df_top_10_cities = df_sales.iloc[0:10]
df_top_10_cities=df_top_10_cities.sort_values(by='sum', ascending=True)
df_top_10_cities.head()

Unnamed: 0,country,billing_city,billing_region,sum,latitude,longitude,sum actual,text
2506,United States,Dallas,Texas,30033,32.790439,-96.80439,"$30,033","Dallas, Texas $30033"
4502,United States,Hayward,California,35420,37.6564,-122.0957,"$35,420","Hayward, California $35420"
9211,United States,San Diego,California,36850,32.71852,-117.159316,"$36,850","San Diego, California $36850"
480,United States,Austin,Texas,37341,30.210692,-97.942749,"$37,341","Austin, Texas $37341"
1244,United States,Brooklyn,New York,38393,40.694021,-73.99034,"$38,393","Brooklyn, New York $38393"


In [10]:
df_top_10_cities['text'] = df_top_10_cities['billing_city'] + ', ' + df_top_10_cities['billing_region'] + '  $' + df_top_10_cities['sum'].astype(str)

In [21]:

fig3 = px.bar(df_top_10_cities, y='billing_city',
             x='sum',
             orientation='h',
             color="sum",
             text=df_top_10_cities['text'],
             labels={"sum": "Sales ($)","billing_city":'US Cities'}, 
             title="Top 10 Cities in US - by 2021 Sales")
fig3.update_traces(textposition='inside')
fig3.show()
fig3.write_html("../Graphs/Top_10_Cities_2021.html")

In [12]:
df_country = pd.read_csv('../Data/countries_with_lat_lon.csv')
df_country.head()

Unnamed: 0,country,sum,latitude,longitude,sum_actual
0,India,9,20.0,77.0,8.5
1,Saint Lucia,10,13.88,-61.13,10.0
2,Holy See (Vatican City State),10,41.9,12.45,10.0
3,Serbia,10,44.0,21.0,10.0
4,Cayman Islands,20,19.5,-80.5,19.99


In [13]:
len(df_country)

81

In [32]:
fig4 = px.scatter_geo(df_country,
                    lon = df_country['longitude'],
                    lat = df_country['latitude'],
                    color='country',
                    hover_name="country",)
fig4.layout.showlegend = False       
fig4.show()
fig4.write_html("../Graphs/International_Presence.html")

In [15]:
df_country = pd.read_csv('../Data/countries_with_lat_lon.csv')
df_country_no_us = df_country[df_country['country']!='United States']
df_country_no_us.head()

Unnamed: 0,country,sum,latitude,longitude,sum_actual
0,India,9,20.0,77.0,8.5
1,Saint Lucia,10,13.88,-61.13,10.0
2,Holy See (Vatican City State),10,41.9,12.45,10.0
3,Serbia,10,44.0,21.0,10.0
4,Cayman Islands,20,19.5,-80.5,19.99


In [27]:
df_country_no_us['text'] = df_country_no_us['country'] + '  $' +  df_country_no_us['sum'].astype(str)

fig5 = go.Figure(data=go.Scattergeo(
        lon = df_country_no_us['longitude'],
        lat = df_country_no_us['latitude'],
        text = df_country_no_us['text'],
        mode = 'markers',
        hovertemplate =
           '<i>Country:</i>'+
           '<br><b>%{text}</b>',
        marker=dict(
            size=df_country_no_us['sum']/400,
            color=df_country_no_us['sum'],
            showscale=True,
            colorbar_title="Sales($)<br>2021"
            
        )
            
        ))

fig5.update_layout(
        title_text = 'Vitamin Ventures - 2021 International Sales',
        
        geo = dict(
            scope = 'world',
            landcolor = 'rgb(217, 217, 217)'
        )
    )


fig5.show()
fig5.write_html("../Graphs/International_Sales.html")

In [17]:
df_country_no_us=df_country_no_us.sort_values(by='sum', ascending=False)
df_top_10_country_no_us = df_country_no_us.iloc[0:10]
df_top_10_country_no_us = df_top_10_country_no_us.sort_values(by='sum',ascending=True)
df_top_10_country_no_us.head()

Unnamed: 0,country,sum,latitude,longitude,sum_actual,text
70,Singapore,3532,1.37,103.8,3532.11,Singapore $3532
71,France,3644,46.0,2.0,3643.5,France $3644
72,Bahrain,7535,26.0,50.55,7535.3,Bahrain $7535
73,Qatar,8545,25.5,51.25,8544.86,Qatar $8545
74,Australia,12877,-27.0,133.0,12876.51,Australia $12877


In [29]:

fig6 = px.bar(df_top_10_country_no_us, y='country',
             x='sum',
             color="sum",
             orientation='h',
             text='country',
             labels={"sum": "Sales ($)","country":'Countries'}, 
             title="Top 10 International Countries - by 2021 Sales")
fig6.update_traces(textposition='inside')
fig6.show()
fig6.write_html("../Graphs/Top_10_International_Countries.html")