# AdventureWorks - Easy

![rel](https://sqlzoo.net/w/images/2/28/AdventureWorks.png)

In [1]:
import findspark
import pandas as pd
findspark.init()

SVR = '192.168.31.31'
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql import Window

sc = (SparkSession.builder.appName('app14.1') 
      .master(f'spark://{SVR}:7077') 
      .config('spark.sql.warehouse.dir', f'hdfs://{SVR}:9000/user/hive/warehouse') 
      .config('spark.cores.max', '4') 
      .config('spark.executor.instances', '1') 
      .config('spark.executor.cores', '2') 
      .config('spark.executor.memory', '10g') 
      .enableHiveSupport().getOrCreate())

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [2]:
cust_aw = sc.read.table('sqlzoo.CustomerAW')
cust_addr = sc.read.table('sqlzoo.CustomerAddress')
addr = sc.read.table('sqlzoo.Address')
product = sc.read.table('sqlzoo.Product')
order_det = sc.read.table('sqlzoo.SalesOrderDetail')
order_head = sc.read.table('sqlzoo.SalesOrderHeader')

## 1.
**Show the first name and the email address of customer with CompanyName 'Bike World'**

In [3]:
(cust_aw.filter(col('CompanyName')=='Bike World')
 .select('FirstName', 'EmailAddress')
 .toPandas())

                                                                                

Unnamed: 0,FirstName,EmailAddress
0,Kerim,kerim0@adventure-works.com


## 2.
**Show the CompanyName for all customers with an address in City 'Dallas'.**

In [4]:
(cust_aw.join(cust_addr, on='CustomerID')
 .join(addr.filter(col('City')=='Dallas'), on='AddressID')
 .select('CompanyName')
 .distinct()
 .orderBy('CompanyName')
 .toPandas())

Unnamed: 0,CompanyName
0,Elite Bikes
1,Rental Bikes
2,Third Bike Store
3,Town Industries
4,Unsurpassed Bikes


## 3.
**How many items with ListPrice more than $1000 have been sold?**

In [5]:
(order_det.join(product.filter(col('ListPrice')>1000), 'ProductID')
 .groupBy()
 .agg(sum('OrderQty').alias('qty'))
 .toPandas())

Unnamed: 0,qty
0,451


## 4.
**Give the CompanyName of those customers with orders over $100000. Include the subtotal plus tax plus freight.**

In [6]:
(order_head.join(cust_aw, 'CustomerID')
 .withColumn('n_orders', col('SubTotal') + col('TaxAmt') + col('Freight'))
 .filter(col('n_orders')>100000)
 .select('CompanyName')
 .distinct()
 .toPandas())

Unnamed: 0,CompanyName
0,Metropolitan Bicycle Supply
1,Action Bicycle Specialists


## 5.
**Find the number of left racing socks ('Racing Socks, L') ordered by CompanyName 'Riding Cycles'**

In [7]:
(order_det.join(product, 'ProductID')
 .join(order_head, 'SalesOrderID')
 .join(cust_aw, 'CustomerID')
 .filter((col('Name')=='Racing Socks, L') & 
         (col('CompanyName')=='Riding Cycles'))
 .groupBy()
 .agg(sum('OrderQty').alias('total'))
 .toPandas())

Unnamed: 0,total
0,3


In [8]:
sc.stop()