# Competitive analysis

## Setup

In [3]:
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv

True

## Data

Connect to your MySQL-database "db_ecommerce" (make sure to prepare your `.env` file)

In [4]:
load_dotenv()   # take environment variables from .env

engine = create_engine("mysql+pymysql://" + os.environ['DB_URL'] + "/db_ecommerce", pool_pre_ping=True, pool_recycle=300)


## First analysis

- Solve the tasks by inserting the SQL queries. 
- The tasks are provided as comments in the Python code.

Example query (we include `df_example` at th end to print the result):

```Python
df_example = pd.read_sql("""
    SELECT *
    FROM ecommerce_data;
""", engine)

df_example
```

In [5]:
# Average Revenue by E-Shop

df_avg_revenue = pd.read_sql("""
    SELECT eshop_name, AVG(annual_revenue) as average_revenue
    FROM ecommerce_data
    GROUP BY eshop_name;
""", engine)

df_avg_revenue

Unnamed: 0,eshop_name,average_revenue
0,E-ShopA,30.221111
1,E-ShopB,30.221111
2,E-ShopC,30.221111


In [6]:
# E-Shop with the Highest Average Rating

df_best_rating = pd.read_sql("""
    SELECT eshop_name, AVG(average_rating) as average_rating
    FROM ecommerce_data
    GROUP BY eshop_name
    ORDER BY average_rating DESC
    LIMIT 1;
""", engine)

df_best_rating

Unnamed: 0,eshop_name,average_rating
0,E-ShopA,3.98611


In [8]:
# E-Shop Performance Over Time (Revenue per E-Shop by Year)

df_revenue_by_year = pd.read_sql("""
    SELECT eshop_name, YEAR(date) as year, SUM(annual_revenue) as total_revenue
    FROM ecommerce_data
    GROUP BY eshop_name, year;
""", engine)

df_revenue_by_year


Unnamed: 0,eshop_name,year,total_revenue
0,E-ShopA,2020,190.33
1,E-ShopA,2021,363.71
2,E-ShopA,2022,533.92
3,E-ShopB,2020,190.33
4,E-ShopB,2021,363.71
5,E-ShopB,2022,533.92
6,E-ShopC,2020,190.33
7,E-ShopC,2021,363.71
8,E-ShopC,2022,533.92


In [9]:
# E-Shop with the Most Social Media Followers

df_most_followers = pd.read_sql("""
    SELECT eshop_name, MAX(social_media_followers) as max_followers
    FROM ecommerce_data
    GROUP BY eshop_name
    ORDER BY max_followers DESC
    LIMIT 1;
""", engine)

df_most_followers

Unnamed: 0,eshop_name,max_followers
0,E-ShopA,1043


In [13]:
# Monthly Active User Base Growth

# Hint: use DATE_FORMAT(date, '%%Y-%%m') as month

df_user_growth = pd.read_sql("""
    SELECT eshop_name, DATE_FORMAT(date, '%%Y-%%m') as month, AVG(active_user_base) as average_active_users
    FROM ecommerce_data
    GROUP BY eshop_name, month;
""", engine)

df_user_growth

Unnamed: 0,eshop_name,month,average_active_users
0,E-ShopA,2020-01,0.89
1,E-ShopA,2020-02,0.42
2,E-ShopA,2020-03,0.69
3,E-ShopA,2020-04,2.42
4,E-ShopA,2020-05,2.77
...,...,...,...
103,E-ShopC,2022-08,8.51
104,E-ShopC,2022-09,10.09
105,E-ShopC,2022-10,9.29
106,E-ShopC,2022-11,8.47


In [14]:
# close connection
engine.dispose()
