# Altair analysis part 3


## Setup

In [1]:
import pandas as pd
import altair as alt
#alt.renderers.enable('mimetype')

## Data

### Import data

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/kirenz/lab-competitive/main/code/ecommerce.csv')

### Data structure

In [3]:
df.head()

Unnamed: 0,eshop_name,date,annual_revenue,time_on_site,average_rating,social_media_followers,average_response_time
0,E-ShopA,2020-01-31,13.35,1.09,4.17,173.76,2.35
1,E-ShopA,2020-02-29,10.74,0.56,4.79,52.69,2.58
2,E-ShopA,2020-03-31,11.91,0.57,2.92,141.79,1.54
3,E-ShopA,2020-04-30,16.38,2.44,3.68,190.57,1.92
4,E-ShopA,2020-05-31,6.52,2.07,2.67,129.49,1.49


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108 entries, 0 to 107
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   eshop_name              108 non-null    object 
 1   date                    108 non-null    object 
 2   annual_revenue          108 non-null    float64
 3   time_on_site            108 non-null    float64
 4   average_rating          108 non-null    float64
 5   social_media_followers  108 non-null    float64
 6   average_response_time   108 non-null    float64
dtypes: float64(5), object(2)
memory usage: 6.0+ KB


### Data corrections

In [5]:
df['date'] = pd.to_datetime(df['date'])
df['eshop_name'] = df['eshop_name'].astype('category')

## Task 1

Average Revenue by E-Shop

- Show the average revenue for all shops
- Use the name `average_revenue`

In [6]:
# Simple chart
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('eshop_name'),
    y=alt.Y('average(annual_revenue)')
)

chart

In [7]:
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('eshop_name').axis(
                            title='E-Shop', 
                            titleAnchor="start",
                            labelAngle=0,
                            grid=False
                            ),
    y=alt.Y('average(annual_revenue)').axis(
                            title='Average Annnual Revenue', 
                            titleAnchor="end",
                            grid=False
                            ),
    tooltip=alt.Tooltip("average(annual_revenue)", format=",.2f")
).properties(
    title='Average Annual Revenue by E-Shop',
    width= 250,
    height=200
).configure_title( 
    fontSize=16,
    font='Arial',
    anchor='start'
).configure_view(
    strokeWidth=0
)

chart

## Task 2

Two E-Shops with the Highest Average Rating

- Show the two E-Shops with the highest average rating
- Use the name `average_rating`

In [8]:
df_best_rating = df.groupby('eshop_name')['average_rating'].mean().reset_index().sort_values('average_rating', ascending=False).head(2)

chart = alt.Chart(df_best_rating).mark_bar().encode(
    x=alt.X('eshop_name').axis(
                            title='E-Shop', 
                            titleAnchor="start",
                            labelAngle=0,
                            grid=False
                            ),
    y=alt.Y('average_rating').axis(
                            title='Average Rating', 
                            titleAnchor="end",
                            grid=False
                            ),
    tooltip=alt.Tooltip("average_rating", format=",.2f")
).properties(
    title='Shops with the Highest Average Ratings',
    width= 250,
    height=200
).configure_title( 
    fontSize=16,
    font='Arial',
    anchor='start'
).configure_view(
    strokeWidth=0
)

chart

## Task 3 

E-Shop Performance Over Time 

- Show the annual revenue per E-Shop by year
- Use the names `year` and `total_revenue`

In [9]:
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('year(date):O').axis(
                            title='Year', 
                            titleAnchor="start",
                            labelAngle=0,
                            grid=False
                            ),
    y=alt.Y('sum(annual_revenue):Q').axis(
                            title='Total Revenue', 
                            titleAnchor="end",
                            grid=False
                            ),
    color=alt.Color('eshop_name:N', title=None),
).properties(
    title='Total Annual Revenue by E-shop and Year',
    width= 250,
    height=200
).configure_title( 
    fontSize=16,
    font='Arial',
    anchor='start'
).configure_view(
    strokeWidth=0
)

chart


## Task 4

Maximum Social Media Followers

- Show the maximum amount of social media followers for every E-shop in a descending order.
- Use the name `max_followers`

In [10]:
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('eshop_name:N').axis(
                            title='E-Shop', 
                            titleAnchor="start",
                            labelAngle=0,
                            grid=False
                            ).sort('-y'),
    y=alt.Y('max(social_media_followers):Q').axis(
                            title='Social Media Followers', 
                            titleAnchor="end",
                            grid=False
                            )
).properties(
    title='Maximum Social Media Followers',
    width= 250,
    height=200
).configure_title( 
    fontSize=16,
    font='Arial',
    anchor='start'
).configure_view(
    strokeWidth=0
)

chart


## Task 5

Monthly Time on Site overview

- Show a monthly overview of the average time on site for every E-shop (order by E-shop and month)
- Use the names `month` and `average_time_on_site`

In [12]:
chart = alt.Chart(df).mark_line().encode(
    x=alt.X('month(date):T').axis(
                               title='Month', 
                               titleAnchor="start",
                               labelAngle=0,
                               grid=False
                               ),
    y=alt.Y('average(time_on_site):Q').axis(
                               title='Social Media Followers', 
                               titleAnchor="end",
                               grid=False
                               ),
    color=alt.Color('eshop_name:N', title=None)
).properties(
    title='Average Monthly Time on Site',
    width= 450,
    height=200
).configure_title( 
    fontSize=16,
    font='Arial',
    anchor='start'
).configure_view(
    strokeWidth=0
)

chart