# Altair analysis part 3


## Setup

In [None]:
import pandas as pd
import altair as alt
alt.renderers.enable('mimetype')


## Data

### Import data

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/kirenz/lab-competitive/main/code/ecommerce.csv')

### Data structure

In [None]:
df.head()

In [None]:
df.info()

### Data corrections

In [None]:
df['date'] = pd.to_datetime(df['date'])
df['eshop_name'] = df['eshop_name'].astype('category')

## Task 1

Average Revenue by E-Shop

- Show the average revenue for all shops
- Use the name `average_revenue`

In [None]:
# Simple chart
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('eshop_name'),
    y=alt.Y('average(annual_revenue)')
)

chart

In [None]:
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('eshop_name').axis(title='E-Shop', 
                               titleAnchor="start",
                               labelAngle=0,
                               grid=False),
    y=alt.Y('average(annual_revenue)').axis(title='Average Annnual Revenue', 
                               titleAnchor="end",
                               grid=False),
  #  tooltip=['eshop_name', 'average(annual_revenue)']
    tooltip=alt.Tooltip("average(annual_revenue)", format=",.2f")
).properties(
    title='Average Annual Revenue by E-Shop',
    width= 250,
    height=200
).configure_title( 
    fontSize=16,
    font='Arial',
    anchor='start'
).configure_view(
    strokeWidth=0
)

chart

## Task 2

Two E-Shops with the Highest Average Rating

- Show the two E-Shops with the highest average rating
- Use the name `average_rating`

In [None]:
df_best_rating = df.groupby('eshop_name')['average_rating'].mean().reset_index().sort_values('average_rating', ascending=False).head(2)

chart = alt.Chart(df_best_rating).mark_bar().encode(
    x=alt.X('eshop_name').axis(title='E-Shop', 
                               titleAnchor="start",
                               labelAngle=0,
                               grid=False),
    y=alt.Y('average_rating').axis(title='Average Rating', 
                               titleAnchor="end",
                               grid=False),
  #  tooltip=['eshop_name', 'average(annual_revenue)']
    tooltip=alt.Tooltip("average_rating", format=",.2f")
).properties(
    title='Shops with the Highest Average Ratings',
    width= 250,
    height=200
).configure_title( 
    fontSize=16,
    font='Arial',
    anchor='start'
).configure_view(
    strokeWidth=0
)

chart

## Task 3 

E-Shop Performance Over Time 

- Show the annual revenue per E-Shop by year
- Use the names `year` and `total_revenue`

In [None]:
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('year(date):O').axis(title='Year', 
                               titleAnchor="start",
                               labelAngle=0,
                               grid=False),
    y=alt.Y('sum(annual_revenue):Q').axis(title='Total Revenue', 
                               titleAnchor="end",
                               grid=False),
    color=alt.Color('eshop_name:N', title=None),
).properties(
    title='Total Annual Revenue by E-shop and Year',
    width= 250,
    height=200
).configure_title( 
    fontSize=16,
    font='Arial',
    anchor='start'
).configure_view(
    strokeWidth=0
)

chart


## Task 4

Maximum Social Media Followers

- Show the maximum amount of social media followers for every E-shop in a descending order.
- Use the name `max_followers`

In [51]:
df_most_followers = df.groupby('eshop_name')['social_media_followers'].max().reset_index().rename(columns={'social_media_followers':'max_followers'}).sort_values('max_followers', ascending=False).head(1)

chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('eshop_name:N').axis(title='Year', 
                               titleAnchor="start",
                               labelAngle=0,
                               grid=False),
    y=alt.Y('max(social_media_followers):Q').axis(title='Total Revenue', 
                               titleAnchor="end",
                               grid=False),
    order=alt.Order('max(social_media_followers):Q', sort='descending')
).properties(
    title='Total Annual Revenue by E-shop and Year',
    width= 250,
    height=200
).configure_title( 
    fontSize=16,
    font='Arial',
    anchor='start'
).configure_view(
    strokeWidth=0
)

chart


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [None]:
df_most_followers = df.groupby('eshop_name')['social_media_followers'].max().reset_index().rename(columns={'social_media_followers':'max_followers'}).sort_values('max_followers', ascending=False).head(1)

df_most_followers

## Task 5

Monthly Time on Site overview

- Show a monthly overview of the average time on site for every E-shop (order by E-shop and month)
- Use the names `month` and `average_time_on_site`

In [None]:
df['month'] = df['date'].dt.month
df_user_growth = df.groupby(['eshop_name', 'month'])['time_on_site'].mean().reset_index().rename(columns={'time_on_site':'average_time_on_site'})

df_user_growth