In [2]:
%load_ext sql
%sql mysql+pymysql://root:12345678@localhost:3306/brazil

'Connected: root@brazil'

<img src = 'pic_of_new_schema.jpg'> </img>

SETTING THE ATMOSPHERE 

In [11]:
%%sql
CREATE VIEW
  normalized_view_with_geolocation AS
SELECT
  c.customer_id,
  c.customer_unique_id,
  c.customer_zip_code_prefix,
  gc.geolocation_city AS customer_city,
  gc.geolocation_state AS customer_state,
  o.order_id,
  o.order_status,
  o.order_purchase_timestamp,
  o.order_approved_at,
  o.order_delivered_carrier_date,
  o.order_delivered_customer_date,
  o.order_estimated_delivery_date,
  oi.order_item_id,
  oi.product_id,
  oi.seller_id,
  oi.shipping_limit_date,
  oi.price AS item_price,
  oi.freight_value,
  p.product_category_name,
  p.product_name_lenght,
  p.product_description_lenght,
  p.product_photos_qty,
  p.product_weight_g,
  p.product_length_cm,
  p.product_height_cm,
  p.product_width_cm,
  s.seller_zip_code_prefix,
  gs.geolocation_city AS seller_city,
  gs.geolocation_state AS seller_state,
  op.payment_sequential,
  op.payment_type,
  op.payment_installments,
  op.payment_value,
  ov.review_id,
  ov.review_score,
  ov.review_comment_title,
  ov.review_comment_message,
  ov.review_creation_date,
  ov.review_answer_timestamp
FROM
  customers c
  JOIN orders o ON c.customer_id = o.customer_id
  JOIN order_items oi ON o.order_id = oi.order_id
  JOIN products p ON p.product_id = oi.product_id
  JOIN category_translation ct ON ct.product_category_name = p.product_category_name
  JOIN sellers s ON s.seller_id = oi.seller_id
  JOIN order_payments op ON o.order_id = op.order_id
  JOIN order_reviews ov ON o.order_id = ov.order_id
  JOIN geolocation gc ON gc.geolocation_zip_code_prefix = c.customer_zip_code_prefix
  JOIN geolocation gs ON gs.geolocation_zip_code_prefix = s.seller_zip_code_prefix;

 * mysql+pymysql://root:***@localhost:3306/brazil
0 rows affected.


ResourceClosedError: This result object does not return rows. It has been closed automatically.

### <font color='orange'> 1-CUSTOMER ANALYSIS </font>

1- Number of customers in the database


In [3]:
%%sql
SELECT
  COUNT(DISTINCT customer_id) AS unique_customers
FROM
  customers

 * mysql+pymysql://root:***@localhost:3306/brazil
1 rows affected.


unique_customers
99441


2- State with the highest number of customers in the database


In [8]:
%%sql
SELECT
  customer_state AS State,
  COUNT(DISTINCT customer_id) Cust_num
FROM
  customers
GROUP BY
  1
ORDER BY
  2 DESC
LIMIT
  1

 * mysql+pymysql://root:***@localhost:3306/brazil
1 rows affected.


State,Cust_num
SP,41746


3- Cities with the highest number of customers in the database


In [7]:
%%sql
SELECT
  customer_city AS City,
  COUNT(DISTINCT customer_id) Cust_num
FROM
  customers
GROUP BY
  1
ORDER BY
  2 DESC
LIMIT
  5

 * mysql+pymysql://root:***@localhost:3306/brazil
5 rows affected.


City,Cust_num
sao paulo,15540
rio de janeiro,6882
belo horizonte,2773
brasilia,2131
curitiba,1521


4- Top 100 customers based on total sales.

In [45]:
%%sql
SELECT
  c.customer_Unique_id,
  SUM(price + freight_value) AS total_sales
FROM
  customers c
  LEFT JOIN orders o ON c.customer_id = o.customer_id
  LEFT JOIN order_items oi ON o.order_id = oi.order_id
GROUP BY
  1
ORDER BY
  total_sales desc
LIMIT
  100;

 * mysql+pymysql://root:***@localhost:3306/brazil
100 rows affected.


customer_Unique_id,total_sales
da122df9eeddfedc1dc1f5349a1a690c,7571.63
dc4802a71eae9be1dd28f5d788ceb526,6929.31
459bef486812aa25204be022145caa62,6922.21
ff4159b92c40ebe40454e3e6a7c35ed6,6726.66
eebb5dda148d3893cdaf5b5ca3040ccb,4764.34
48e1ac109decbb87765a3eade6854098,4681.78
edde2314c6c30e864a128ac95d6b2112,4513.32
a229eba70ec1c2abef51f04987deb7a5,4445.5
edf81e1f3070b9dac83ec83dacdbb9bc,4194.76
fa562ef24d41361e476e748681810e1e,4175.26


5- Customers that didn't make any orders since registration

In [48]:
%%sql
SELECT
  c.customer_Unique_id,
  SUM(price + freight_value) AS total_sales
FROM
  customers c
  LEFT JOIN orders o ON c.customer_id = o.customer_id
  LEFT JOIN order_items oi ON o.order_id = oi.order_id
GROUP BY
  1
HAVING
  total_sales IS NULL;

 * mysql+pymysql://root:***@localhost:3306/brazil
676 rows affected.


customer_Unique_id,total_sales
8141dd1e051afe7d72079570fe72d5f1,
442bdd695caba6183fc44d6cfdd4f094,
7d373e92dd3086b4c37e9868fc8999c1,
df136c686804f763b62d6b3bee85ba70,
2905d3311b2a67382390f97d14e9d7b1,
2d6e75f5a4a33d3a936f1ea68ac9bf01,
1d49289b3a906433e3ee62d969b5fd35,
646c00b19d9f4cc67be0cd6f026ba3ce,
bb6ffa5f9b68665f0cb238ea298092a3,
ca072da22ab6c6a14608c9d12c00595c,


###  <font color='orange' font='calibry'>2-PRODUCT ANALYSIS</font>

1- what are different product categories in the database


In [16]:
%%sql
SELECT
  COUNT(DISTINCT product_category_name)
FROM
  products

 * mysql+pymysql://root:***@localhost:3306/brazil
1 rows affected.


COUNT(Distinct product_category_name)
73


2- Number of products in each category


In [22]:
%%sql
WITH
  original AS (
    SELECT
      product_category_name,
      COUNT(product_category_name) AS products_per_category
    FROM
      products
    GROUP BY
      1
  )
SELECT
  ct.product_category_name_english,
  o.products_per_category
FROM
  original AS o
  INNER JOIN category_translation AS ct ON o.product_category_name = ct.product_category_name
ORDER BY
  2 DESC

 * mysql+pymysql://root:***@localhost:3306/brazil
73 rows affected.


product_category_name_english,products_per_category
bed_bath_table,3029
sports_leisure,2867
furniture_decor,2657
health_beauty,2444
housewares,2335
auto,1900
computers_accessories,1639
toys,1411
watches_gifts,1329
telephony,1134


3- Cties with the highest number of customers in the database


In [12]:
%%sql
-- Count unique customers in the database
SELECT
  customer_city,
  COUNT(DISTINCT customer_id) Cust_num
FROM
  customers
GROUP BY
  1
ORDER BY
  2 DESC
LIMIT
  5

 * mysql+pymysql://root:***@localhost:3306/brazil
5 rows affected.


customer_city,Cust_num
sao paulo,15540
rio de janeiro,6882
belo horizonte,2773
brasilia,2131
curitiba,1521


### <font color='orange'> 1-CUSTOMER ANALYSIS </font>

### <font color='orange'> 1-CUSTOMER ANALYSIS </font>

### <font color='orange'> 1-CUSTOMER ANALYSIS </font>

### <font color='orange'> 1-CUSTOMER ANALYSIS </font>

### <font color='orange'> 1-CUSTOMER ANALYSIS </font>