#Accessing the file from drive


In [0]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
downloaded = drive.CreateFile({'id':'1-hJXei9qmyZxKMH-zRm02t_RPnWyOfKL'})
downloaded.GetContentFile('100000 Sales Records.csv')

#Installing dependencies


In [0]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q http://apache.osuosl.org/spark/spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz
!tar xf spark-2.4.4-bin-hadoop2.7.tgz
!pip install -q findspark

In [0]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.4-bin-hadoop2.7"

In [0]:
import findspark
findspark.init()
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()

#Reading the csv file

In [0]:
data = spark.read.csv('100000 Sales Records.csv',inferSchema=True, header =True)

In [8]:
data.show(truncate = False)

+---------------------------------+----------------------+---------------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|Region                           |Country               |Item Type      |Sales Channel|Order Priority|Order Date|Order ID |Ship Date |Units Sold|Unit Price|Unit Cost|Total Revenue|Total Cost|Total Profit|
+---------------------------------+----------------------+---------------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|Middle East and North Africa     |Azerbaijan            |Snacks         |Online       |C             |10/8/2014 |535113847|10/23/2014|934       |152.58    |97.44    |142509.72    |91008.96  |51500.76    |
|Central America and the Caribbean|Panama                |Cosmetics      |Offline      |L             |2/22/2015 |874708545|2/27/2015 |4551      |437.2     |263.33   |1989697.2

#Making the data workable

Renaming Columns 

In [0]:
data = data.withColumnRenamed('Item Type','Item_Type').withColumnRenamed('Sales Channel','Sales Channel').withColumnRenamed('Order Priority','Order Priority').withColumnRenamed('Order Date','Order_Date').withColumnRenamed('Order ID','Order_ID').withColumnRenamed('Ship Date','Ship_Date').withColumnRenamed('Units Sold','Units_Sold').withColumnRenamed('Unit Price','Unit_Price').withColumnRenamed('Unit Cost','Unit_Cost').withColumnRenamed('Total Revenue','Total_Revenue').withColumnRenamed('Total Cost','Total_Cost').withColumnRenamed('Total Profit','Total_Profit').withColumnRenamed('Sales Channel','Sales_Channel').withColumnRenamed('Order Priority','Order_Priority')

#Exploratory analysis 

Exploratory analysis is often the first step of data analysis. Here we get familiar with data, ask questions, visualize the data in a number of forms, look for relationships between the variables, look for outliers, patterns and trends in data. The output of exploratory analysis is usually only for the us, the data analysts.

Showing the updated dataframe

In [0]:
data.show(8)

+--------------------+--------------------+-------------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|              Region|             Country|    Item_Type|Sales_Channel|Order_Priority|Order_Date| Order_ID| Ship_Date|Units_Sold|Unit_Price|Unit_Cost|Total_Revenue|Total_Cost|Total_Profit|
+--------------------+--------------------+-------------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|Middle East and N...|          Azerbaijan|       Snacks|       Online|             C| 10/8/2014|535113847|10/23/2014|       934|    152.58|    97.44|    142509.72|  91008.96|    51500.76|
|Central America a...|              Panama|    Cosmetics|      Offline|             L| 2/22/2015|874708545| 2/27/2015|      4551|     437.2|   263.33|    1989697.2|1198414.83|   791282.37|
|  Sub-Saharan Africa|Sao Tome and Prin...|       Fruit

Checking the datatypes of columns

In [0]:
data.printSchema()

root
 |-- Region: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Item_Type: string (nullable = true)
 |-- Sales_Channel: string (nullable = true)
 |-- Order_Priority: string (nullable = true)
 |-- Order_Date: string (nullable = true)
 |-- Order_ID: integer (nullable = true)
 |-- Ship_Date: string (nullable = true)
 |-- Units_Sold: integer (nullable = true)
 |-- Unit_Price: double (nullable = true)
 |-- Unit_Cost: double (nullable = true)
 |-- Total_Revenue: double (nullable = true)
 |-- Total_Cost: double (nullable = true)
 |-- Total_Profit: double (nullable = true)



Row count

In [0]:
data.count()

100000

Statistical Properties of continuous features

In [0]:
data.describe(['Unit_Cost','Unit_Price','Units_Sold','Total_Cost','Total_Revenue','Total_Profit']).show()

+-------+------------------+------------------+-----------------+------------------+------------------+-----------------+
|summary|         Unit_Cost|        Unit_Price|       Units_Sold|        Total_Cost|     Total_Revenue|     Total_Profit|
+-------+------------------+------------------+-----------------+------------------+------------------+-----------------+
|  count|            100000|            100000|           100000|            100000|            100000|           100000|
|   mean|188.01971109995176|266.70398939996755|       5001.44617| 941975.4933679946|1336066.7306641012|394091.2372960973|
| stddev|175.70602309486486|216.94008096309065|2884.575424393709|1151828.4340069285|1471767.5884969924|379598.6027035508|
|    min|              6.92|              9.33|                1|             13.84|             18.66|             4.82|
|    max|            524.96|            668.27|            10000|        5249075.04|         6682700.0|        1738700.0|
+-------+---------------

finding the unique values in each column

In [0]:
data.select('Region').distinct().show()

+--------------------+
|              Region|
+--------------------+
|Middle East and N...|
|Australia and Oce...|
|              Europe|
|  Sub-Saharan Africa|
|Central America a...|
|       North America|
|                Asia|
+--------------------+



In [0]:
data.select('Country').distinct().show()

+-----------+
|    Country|
+-----------+
|       Chad|
|     Russia|
|      Yemen|
|    Senegal|
|     Sweden|
|   Kiribati|
|    Eritrea|
|Philippines|
|   Djibouti|
|      Tonga|
|  Singapore|
|   Malaysia|
|       Fiji|
|     Turkey|
|     Malawi|
|       Iraq|
|    Germany|
|    Comoros|
|   Cambodia|
|Afghanistan|
+-----------+
only showing top 20 rows



In [0]:
data.select('Item_Type').distinct().show()

+---------------+
|      Item_Type|
+---------------+
|      Baby Food|
|         Cereal|
|           Meat|
|      Household|
|     Vegetables|
|      Beverages|
|Office Supplies|
|      Cosmetics|
|  Personal Care|
|         Fruits|
|         Snacks|
|        Clothes|
+---------------+



In [0]:
data.select('Sales_Channel').distinct().show()

+-------------+
|Sales_Channel|
+-------------+
|       Online|
|      Offline|
+-------------+



In [0]:
data.select('Order_Priority').distinct().show()

+--------------+
|Order_Priority|
+--------------+
|             L|
|             M|
|             C|
|             H|
+--------------+



 Filter PySpark Dataframe based on the Condition

In [0]:
# the unit cost of products exceeds the mean value for around 33K products
data.filter(data.Unit_Cost >= '188.0197').count()

33394

In [0]:
df = data.filter(data.Unit_Cost >= '188.0197')
df.show(8)

+--------------------+--------------------+---------------+-------------+--------------+----------+---------+---------+----------+----------+---------+-------------+----------+------------+
|              Region|             Country|      Item_Type|Sales_Channel|Order_Priority|Order_Date| Order_ID|Ship_Date|Units_Sold|Unit_Price|Unit_Cost|Total_Revenue|Total_Cost|Total_Profit|
+--------------------+--------------------+---------------+-------------+--------------+----------+---------+---------+----------+----------+---------+-------------+----------+------------+
|Central America a...|              Panama|      Cosmetics|      Offline|             L| 2/22/2015|874708545|2/27/2015|      4551|     437.2|   263.33|    1989697.2|1198414.83|   791282.37|
|Central America a...|              Belize|      Household|      Offline|             H|  2/4/2010|129280602| 3/5/2010|      5858|    668.27|   502.54|   3914725.66|2943879.32|   970846.34|
|              Europe|             Germany|      C

In [0]:
df2 = data.filter(data.Item_Type == 'Office Supplies')
df2.show(8)

+--------------------+--------------------+---------------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|              Region|             Country|      Item_Type|Sales_Channel|Order_Priority|Order_Date| Order_ID| Ship_Date|Units_Sold|Unit_Price|Unit_Cost|Total_Revenue|Total_Cost|Total_Profit|
+--------------------+--------------------+---------------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|Central America a...|Saint Kitts and N...|Office Supplies|       Online|             H| 8/10/2011|563966262| 8/29/2011|      9004|    651.21|   524.96|   5863494.84|4726739.84|   1136755.0|
|              Europe|              Monaco|Office Supplies|       Online|             M| 1/13/2012|982711875| 1/17/2012|      5137|    651.21|   524.96|   3345265.77|2696719.52|   648546.25|
|Middle East and N...|            Tunisia |Of

In [0]:
df3 = data.where((data.Region == 'Europe') & (data.Order_Priority == 'H'))
df3.count()

6482

In [0]:
df3 = data.where((data.Region == 'Europe') & (data.Order_Priority == 'L')| (data.Order_Priority == 'M'))
df3.count()

31517

In [0]:
df3 = data.where((data.Region == 'Europe') & (data.Order_Priority == 'C'))
df3.count()

6573

In [0]:
#Getting the count of products in different price ranges
data.groupby('Unit_Price').count().show()

+----------+-----+
|Unit_Price|count|
+----------+-----+
|    255.28| 8407|
|    154.06| 8282|
|     205.7| 8421|
|    651.21| 8426|
|     81.73| 8364|
|     47.45| 8258|
|      9.33| 8262|
|    152.58| 8308|
|    421.89| 8320|
|    668.27| 8278|
|     437.2| 8370|
|    109.28| 8304|
+----------+-----+



In [0]:
#Units sold
data.groupBy('Units_Sold').count().show()

+----------+-----+
|Units_Sold|count|
+----------+-----+
|      9427|   16|
|      3997|    9|
|      9900|   12|
|      8638|    5|
|      4935|   16|
|      8389|   10|
|      3794|    6|
|      7982|   11|
|      5803|    9|
|      6620|    4|
|      2659|   18|
|       496|    7|
|      2866|    8|
|      6658|   12|
|      4101|   15|
|      2366|   14|
|      1580|   12|
|      1342|   12|
|      3749|   11|
|       463|   11|
+----------+-----+
only showing top 20 rows



In [0]:
#Number of transactions in each region
data.groupBy('Region').count().show()

+--------------------+-----+
|              Region|count|
+--------------------+-----+
|Middle East and N...|12580|
|Australia and Oce...| 8113|
|              Europe|25877|
|  Sub-Saharan Africa|26019|
|Central America a...|10731|
|       North America| 2133|
|                Asia|14547|
+--------------------+-----+



In [0]:
#No of units sold for each item type
data.groupBy('Item_Type').count().show()

+---------------+-----+
|      Item_Type|count|
+---------------+-----+
|      Baby Food| 8407|
|         Cereal| 8421|
|           Meat| 8320|
|      Household| 8278|
|     Vegetables| 8282|
|      Beverages| 8258|
|Office Supplies| 8426|
|      Cosmetics| 8370|
|  Personal Care| 8364|
|         Fruits| 8262|
|         Snacks| 8308|
|        Clothes| 8304|
+---------------+-----+



In [0]:
data.orderBy(data.Unit_Cost).show()

+--------------------+--------------------+---------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|              Region|             Country|Item_Type|Sales_Channel|Order_Priority|Order_Date| Order_ID| Ship_Date|Units_Sold|Unit_Price|Unit_Cost|Total_Revenue|Total_Cost|Total_Profit|
+--------------------+--------------------+---------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|  Sub-Saharan Africa|            Cameroon|   Fruits|       Online|             C| 9/27/2016|301202046|10/24/2016|      1234|      9.33|     6.92|     11513.22|   8539.28|     2973.94|
|  Sub-Saharan Africa|              Malawi|   Fruits|       Online|             M| 5/11/2016|230107629|  6/3/2016|      2816|      9.33|     6.92|     26273.28|  19486.72|     6786.56|
|                Asia|            Thailand|   Fruits|       Online|        

Checking for null values in the dataframe

In [0]:
data.filter(data['Region'].isNull()).count()

0

In [0]:
data.filter(data['Country'].isNull()).count()

0

In [0]:
data.filter(data['Item_Type'].isNull()).count()

0

In [0]:
data.filter(data['Sales_Channel'].isNull()).count()

0

In [0]:
data.filter(data['Order_Priority'].isNull()).count()

0

In [0]:
data.filter(data['Order_Date'].isNull()).count()

0

In [0]:
data.filter(data['Order_ID'].isNull()).count()

0

In [0]:
data.filter(data['Ship_Date'].isNull()).count()

0

In [0]:
data.filter(data['Units_Sold'].isNull()).count()

0

In [0]:
data.filter(data['Unit_Price'].isNull()).count()

0

In [0]:
data.filter(data['Unit_Cost'].isNull()).count()


0

In [0]:
data.filter(data['Total_Revenue'].isNull()).count()

0

In [0]:
data.filter(data['Total_Cost'].isNull()).count()

0

In [0]:
data.filter(data['Total_Profit'].isNull()).count()

0

#Sql queries 

In [0]:
from pyspark.sql import SQLContext
sqlContext = SQLContext(spark)

In [0]:
data.registerTempTable('big_data')

In [12]:
sqlContext.sql('select * from big_data').show(truncate = False)

+---------------------------------+----------------------+---------------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|Region                           |Country               |Item_Type      |Sales_Channel|Order_Priority|Order_Date|Order_ID |Ship_Date |Units_Sold|Unit_Price|Unit_Cost|Total_Revenue|Total_Cost|Total_Profit|
+---------------------------------+----------------------+---------------+-------------+--------------+----------+---------+----------+----------+----------+---------+-------------+----------+------------+
|Middle East and North Africa     |Azerbaijan            |Snacks         |Online       |C             |10/8/2014 |535113847|10/23/2014|934       |152.58    |97.44    |142509.72    |91008.96  |51500.76    |
|Central America and the Caribbean|Panama                |Cosmetics      |Offline      |L             |2/22/2015 |874708545|2/27/2015 |4551      |437.2     |263.33   |1989697.2

In [0]:
sqlContext.sql('select max(Total_Revenue),min(Total_Revenue) from big_data').show()

+------------------+------------------+
|max(Total_Revenue)|min(Total_Revenue)|
+------------------+------------------+
|         6682700.0|             18.66|
+------------------+------------------+



In [121]:
sqlContext.sql("select DISTINCT(Region) from big_data ").show(truncate = False)


+---------------------------------+
|Region                           |
+---------------------------------+
|Middle East and North Africa     |
|Australia and Oceania            |
|Europe                           |
|Sub-Saharan Africa               |
|Central America and the Caribbean|
|North America                    |
|Asia                             |
+---------------------------------+



In [96]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Baby Food' AND Sales_Channel =='Online' ").show()

+------------------+-------------------+------------------+-----------------+--------------------+--------------------+
|    sum(Unit_Cost)|    sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+-------------------+------------------+-----------------+--------------------+--------------------+
|174405.47999999847|8.832518433600007E8|279276.31999999896|             1094|1.4143553542399993E9|5.3110351088000035E8|
+------------------+-------------------+------------------+-----------------+--------------------+--------------------+



In [97]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Baby Food' AND Sales_Channel =='Offline' ").show()

+------------------+---------------+-----------------+-----------------+--------------------+-----------------+
|    sum(Unit_Cost)|sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|sum(Total_Profit)|
+------------------+---------------+-----------------+-----------------+--------------------+-----------------+
|177434.45999999845| 8.8683799626E8|284126.6399999989|             1113|1.4200978778400006E9|   5.3325988158E8|
+------------------+---------------+-----------------+-----------------+--------------------+-----------------+



In [119]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Asia' AND Item_Type == 'Baby Food' AND Sales_Channel =='Online' ").show()

+------------------+-------------------+------------------+-----------------+-------------------+--------------------+
|    sum(Unit_Cost)|    sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+-------------------+------------------+-----------------+-------------------+--------------------+
|102188.21999999926|5.024424198000001E8|163634.47999999946|              641|8.045634232000002E8|3.0212100340000004E8|
+------------------+-------------------+------------------+-----------------+-------------------+--------------------+



In [126]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Asia' AND Item_Type == 'Baby Food' AND Sales_Channel =='Offline' ").show()

+-----------------+-------------------+-----------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+-------------------+-----------------+-----------------+-------------------+--------------------+
|96449.09999999932|4.697275227600002E8|154444.3999999995|              605|7.521769038400004E8|2.8244938107999986E8|
+-----------------+-------------------+-----------------+-----------------+-------------------+--------------------+



In [120]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Europe' AND Item_Type == 'Baby Food' AND Sales_Channel =='Online' ").show()

+------------------+-------------------+------------------+-----------------+--------------------+-----------------+
|    sum(Unit_Cost)|    sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|sum(Total_Profit)|
+------------------+-------------------+------------------+-----------------+--------------------+-----------------+
|173767.79999999847|8.425082362800004E8|278255.19999999896|             1090|1.3491124235199993E9|   5.0660418724E8|
+------------------+-------------------+------------------+-----------------+--------------------+-----------------+



In [127]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Europe' AND Item_Type == 'Baby Food' AND Sales_Channel =='Offline' ").show()

+------------------+-------------------+----------------+-----------------+------------------+-------------------+
|    sum(Unit_Cost)|    sum(Total_Cost)| sum(Unit_Price)|count(Units_Sold)|sum(Total_Revenue)|  sum(Total_Profit)|
+------------------+-------------------+----------------+-----------------+------------------+-------------------+
|169304.03999999852|8.493197746200001E8|271107.359999999|             1062|   1.36001977208E9|5.106999974599998E8|
+------------------+-------------------+----------------+-----------------+------------------+-------------------+



In [122]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Middle East and North Africa' AND Item_Type == 'Baby Food' AND Sales_Channel =='Online' ").show()

+----------------+--------------------+------------------+-----------------+-------------------+-------------------+
|  sum(Unit_Cost)|     sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|  sum(Total_Profit)|
+----------------+--------------------+------------------+-----------------+-------------------+-------------------+
|80507.0999999995|4.0336017965999985E8|128916.39999999962|              505|6.459025634400002E8|2.425423837799998E8|
+----------------+--------------------+------------------+-----------------+-------------------+-------------------+



In [128]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Middle East and North Africa' AND Item_Type == 'Baby Food' AND Sales_Channel =='Offline' ").show()

+-----------------+--------------------+------------------+-----------------+------------------+-----------------+
|   sum(Unit_Cost)|     sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|sum(Total_Revenue)|sum(Total_Profit)|
+-----------------+--------------------+------------------+-----------------+------------------+-----------------+
|88478.09999999941|4.5449318814000016E8|141680.39999999956|              555|    7.2778209176E8|   2.7328890362E8|
+-----------------+--------------------+------------------+-----------------+------------------+-----------------+



In [123]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Australia and Oceania' AND Item_Type == 'Baby Food' AND Sales_Channel =='Online' ").show()

+-----------------+--------------------+-----------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|     sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+--------------------+-----------------+-----------------+-------------------+--------------------+
|52130.33999999981|2.5914071724000004E8|83476.55999999981|              327|4.149632561600001E8|1.5582253891999993E8|
+-----------------+--------------------+-----------------+-----------------+-------------------+--------------------+



In [129]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Australia and Oceania' AND Item_Type == 'Baby Food' AND Sales_Channel =='Offline' ").show()

+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+
|54362.21999999978| 2.6413375164E8|87050.47999999979|              341|4.229586257599999E8|1.5882487411999995E8|
+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+



In [124]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Central America and the Caribbean' AND Item_Type == 'Baby Food' AND Sales_Channel =='Online' ").show()

+-----------------+-------------------+-----------------+-----------------+-------------------+-----------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|sum(Total_Profit)|
+-----------------+-------------------+-----------------+-----------------+-------------------+-----------------+
|69188.27999999962|3.558507877800001E8|110791.5199999997|              434|5.698255495200001E8|   2.1397476174E8|
+-----------------+-------------------+-----------------+-----------------+-------------------+-----------------+



In [130]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Central America and the Caribbean' AND Item_Type == 'Baby Food' AND Sales_Channel =='Offline' ").show()

+-----------------+-------------------+------------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+-------------------+------------------+-----------------+-------------------+--------------------+
|73014.35999999958|3.653978139000001E8|116918.23999999967|              458|5.851132475999998E8|2.1971543370000005E8|
+-----------------+-------------------+------------------+-----------------+-------------------+--------------------+



In [125]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'North America' AND Item_Type == 'Baby Food' AND Sales_Channel =='Online' ").show()

+------------------+---------------+------------------+-----------------+--------------------+--------------------+
|    sum(Unit_Cost)|sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+---------------+------------------+-----------------+--------------------+--------------------+
|14188.380000000005|  7.311543228E7|22719.920000000016|               89|1.1708008751999998E8|4.3964655239999995E7|
+------------------+---------------+------------------+-----------------+--------------------+--------------------+



In [131]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'North America' AND Item_Type == 'Baby Food' AND Sales_Channel =='Offline' ").show()

+------------------+---------------+------------------+-----------------+------------------+--------------------+
|    sum(Unit_Cost)|sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+---------------+------------------+-----------------+------------------+--------------------+
|14826.060000000005|  7.197079668E7|23741.040000000015|               93|    1.1524717712E8|4.3276380440000005E7|
+------------------+---------------+------------------+-----------------+------------------+--------------------+



In [98]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Snacks' AND Sales_Channel =='Online' ").show()

+------------------+-------------------+------------------+-----------------+-------------------+-----------------+
|    sum(Unit_Cost)|    sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|sum(Total_Profit)|
+------------------+-------------------+------------------+-----------------+-------------------+-----------------+
|104163.36000000054|5.314070663999999E8|163108.02000000147|             1069|8.321232573000001E8|    3.007161909E8|
+------------------+-------------------+------------------+-----------------+-------------------+-----------------+



In [99]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Snacks' AND Sales_Channel =='Offline' ").show()

+------------------+-------------------+-----------------+-----------------+-------------------+--------------------+
|    sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+-------------------+-----------------+-----------------+-------------------+--------------------+
|104942.88000000056|5.323304078399998E8|164328.6600000015|             1077|8.335691053799988E8|3.0123869754000014E8|
+------------------+-------------------+-----------------+-----------------+-------------------+--------------------+



In [132]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Asia' AND Item_Type == 'Snacks' AND Sales_Channel =='Online' ").show()

+------------------+--------------------+-----------------+-----------------+-------------------+--------------------+
|    sum(Unit_Cost)|     sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+--------------------+-----------------+-----------------+-------------------+--------------------+
|57976.799999999974|3.1023726719999987E8|90785.10000000065|              595|4.857964103999999E8|1.7555914319999993E8|
+------------------+--------------------+-----------------+-----------------+-------------------+--------------------+



In [133]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Asia' AND Item_Type == 'Snacks' AND Sales_Channel =='Offline' ").show()

+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+
|62166.71999999999|  3.065837544E8|97346.04000000072|              638|4.800754233000002E8|1.7349166890000004E8|
+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+



In [134]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Europe' AND Item_Type == 'Snacks' AND Sales_Channel =='Online' ").show()

+------------------+--------------------+------------------+-----------------+-------------------+-----------------+
|    sum(Unit_Cost)|     sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|sum(Total_Profit)|
+------------------+--------------------+------------------+-----------------+-------------------+-----------------+
|104845.44000000056|5.0795014032000065E8|164176.08000000147|             1076|7.953923687400004E8|   2.8744222842E8|
+------------------+--------------------+------------------+-----------------+-------------------+-----------------+



In [135]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Europe' AND Item_Type == 'Snacks' AND Sales_Channel =='Offline' ").show()

+------------------+---------------+------------------+-----------------+-------------------+--------------------+
|    sum(Unit_Cost)|sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+---------------+------------------+-----------------+-------------------+--------------------+
|101435.04000000047|  5.103327432E8|158835.78000000142|             1041|7.991232549000001E8|2.8879051170000005E8|
+------------------+---------------+------------------+-----------------+-------------------+--------------------+



In [136]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Middle East and North Africa' AND Item_Type == 'Snacks' AND Sales_Channel =='Online' ").show()

+-----------------+--------------------+-----------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|     sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+--------------------+-----------------+-----------------+-------------------+--------------------+
|50571.35999999982|2.4409821072000006E8|79189.02000000051|              519|3.822301415399999E8|1.3813193082000002E8|
+-----------------+--------------------+-----------------+-----------------+-------------------+--------------------+



In [137]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Middle East and North Africa' AND Item_Type == 'Snacks' AND Sales_Channel =='Offline' ").show()

+-----------------+-------------------+-----------------+-----------------+--------------------+--------------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+-------------------+-----------------+-----------------+--------------------+--------------------+
|54566.39999999993|2.768961249599999E8|85444.80000000058|              560|4.3358795921999997E8|1.5669183426000008E8|
+-----------------+-------------------+-----------------+-----------------+--------------------+--------------------+



In [138]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Australia and Oceania' AND Item_Type == 'Snacks' AND Sales_Channel =='Online' ").show()

+-----------------+--------------------+----------------+-----------------+--------------------+-------------------+
|   sum(Unit_Cost)|     sum(Total_Cost)| sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|  sum(Total_Profit)|
+-----------------+--------------------+----------------+-----------------+--------------------+-------------------+
|33129.59999999995|1.6424866415999997E8|51877.2000000002|              340|2.5719479862000006E8|9.294613446000004E7|
+-----------------+--------------------+----------------+-----------------+--------------------+-------------------+



In [139]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Australia and Oceania' AND Item_Type == 'Snacks' AND Sales_Channel =='Offline' ").show()

+-----------------+--------------------+-----------------+-----------------+--------------------+-----------------+
|   sum(Unit_Cost)|     sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|sum(Total_Profit)|
+-----------------+--------------------+-----------------+-----------------+--------------------+-----------------+
|30985.91999999997|1.5950869535999998E8|48520.44000000017|              318|2.4977254451999992E8|    9.026384916E7|
+-----------------+--------------------+-----------------+-----------------+--------------------+-----------------+



In [140]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Central America and the Caribbean' AND Item_Type == 'Snacks' AND Sales_Channel =='Online' ").show()

+------------------+--------------------+-----------------+-----------------+--------------------+--------------------+
|    sum(Unit_Cost)|     sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+--------------------+-----------------+-----------------+--------------------+--------------------+
|44919.839999999866|2.2200641904000002E8|70339.38000000041|              461|3.4763689878000003E8|1.2563047973999998E8|
+------------------+--------------------+-----------------+-----------------+--------------------+--------------------+



In [141]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Central America and the Caribbean' AND Item_Type == 'Snacks' AND Sales_Channel =='Offline' ").show()

+-----------------+-------------------+-----------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+-------------------+-----------------+-----------------+-------------------+--------------------+
|43068.47999999986|2.141411596800001E8|67440.36000000038|              442|3.353207937600001E8|1.2117963407999995E8|
+-----------------+-------------------+-----------------+-----------------+-------------------+--------------------+



In [142]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'North America' AND Item_Type == 'Snacks' AND Sales_Channel =='Online' ").show()

+-----------------+--------------------+------------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|     sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+--------------------+------------------+-----------------+-------------------+--------------------+
|7990.079999999998|4.0649726879999995E7|12511.559999999998|               82|6.365286666000001E7|2.3003139780000005E7|
+-----------------+--------------------+------------------+-----------------+-------------------+--------------------+



In [143]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'North America' AND Item_Type == 'Snacks' AND Sales_Channel =='Offline' ").show()

+-----------------+--------------------+------------------+-----------------+--------------------+-----------------+
|   sum(Unit_Cost)|     sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|sum(Total_Profit)|
+-----------------+--------------------+------------------+-----------------+--------------------+-----------------+
|8769.599999999997|4.2769144320000015E7|13732.199999999997|               90|6.6971634239999995E7|    2.420248992E7|
+-----------------+--------------------+------------------+-----------------+--------------------+-----------------+



In [75]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Clothes' AND Sales_Channel =='Online' ").show()

+-----------------+-------------------+-----------------+------------------+-------------------+--------------------+
|   sum(Unit_Cost)|  sum(Total_Profit)|count(Units_Sold)|   sum(Unit_Price)| sum(Total_Revenue)|     sum(Total_Cost)|
+-----------------+-------------------+-----------------+------------------+-------------------+--------------------+
|38384.64000000013|3.816287568000001E8|             1071|117038.87999999919|5.678702415999998E8|1.8624148479999998E8|
+-----------------+-------------------+-----------------+------------------+-------------------+--------------------+



In [100]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Clothes' AND Sales_Channel =='Offline' ").show()

+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+
|38205.44000000013| 1.9350879744E8|116492.4799999992|             1066|5.900290564799991E8|3.9652025903999954E8|
+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+



In [101]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Beverages' AND Sales_Channel =='Online' ").show()

+----------------+--------------------+------------------+-----------------+--------------------+-------------------+
|  sum(Unit_Cost)|     sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|  sum(Total_Profit)|
+----------------+--------------------+------------------+-----------------+--------------------+-------------------+
|35064.3700000005|1.7564617157999995E8|52337.350000000384|             1103|2.6217083490000015E8|8.652466332000002E7|
+----------------+--------------------+------------------+-----------------+--------------------+-------------------+



In [102]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Beverages' AND Sales_Channel =='Offline' ").show()

+-----------------+---------------+-----------------+-----------------+--------------------+-------------------+
|   sum(Unit_Cost)|sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|  sum(Total_Profit)|
+-----------------+---------------+-----------------+-----------------+--------------------+-------------------+
|34778.26000000049| 1.7636529317E8|51910.30000000031|             1094|2.6324420135000014E8|8.687890817999995E7|
+-----------------+---------------+-----------------+-----------------+--------------------+-------------------+



In [104]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Cereal' AND Sales_Channel =='Online' ").show()

+-----------------+-------------------+------------------+-----------------+--------------------+-------------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|  sum(Total_Profit)|
+-----------------+-------------------+------------------+-----------------+--------------------+-------------------+
|124839.2600000005|6.217580698000002E8|219276.19999999893|             1066|1.0920983259999998E9|4.703402561999997E8|
+-----------------+-------------------+------------------+-----------------+--------------------+-------------------+



In [105]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Cereal' AND Sales_Channel =='Offline' ").show()

+------------------+---------------+------------------+-----------------+--------------------+-------------------+
|    sum(Unit_Cost)|sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|  sum(Total_Profit)|
+------------------+---------------+------------------+-----------------+--------------------+-------------------+
|125541.92000000051| 6.2933473547E8|220510.39999999938|             1072|1.1054064989000003E9|4.760717634300002E8|
+------------------+---------------+------------------+-----------------+--------------------+-------------------+



In [106]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Meat' AND Sales_Channel =='Online' ").show()

+------------------+--------------------+-----------------+-----------------+--------------------+--------------------+
|    sum(Unit_Cost)|     sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+--------------------+-----------------+-----------------+--------------------+--------------------+
|400794.31000000203|1.9784045928600008E9|463657.1100000063|             1099|2.2887085296600027E9|3.1030393679999983E8|
+------------------+--------------------+-----------------+-----------------+--------------------+--------------------+



In [107]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Meat' AND Sales_Channel =='Offline' ").show()

+----------------+--------------------+------------------+-----------------+------------------+-------------------+
|  sum(Unit_Cost)|     sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)|sum(Total_Revenue)|  sum(Total_Profit)|
+----------------+--------------------+------------------+-----------------+------------------+-------------------+
|394959.270000002|1.9575465130000014E9|456906.87000000605|             1083|     2.264578953E9|3.070324399999999E8|
+----------------+--------------------+------------------+-----------------+------------------+-------------------+



In [108]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Household' AND Sales_Channel =='Online' ").show()

+-----------------+-------------------+-----------------+-----------------+--------------------+-----------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|sum(Total_Profit)|
+-----------------+-------------------+-----------------+-----------------+--------------------+-----------------+
|550783.8399999978|2.786497863120001E9|732423.9200000025|             1096|3.7054422075600004E9|   9.1894434444E8|
+-----------------+-------------------+-----------------+-----------------+--------------------+-----------------+



In [109]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Household' AND Sales_Channel =='Offline' ").show()

+----------------+--------------------+-----------------+-----------------+--------------------+-------------------+
|  sum(Unit_Cost)|     sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|  sum(Total_Profit)|
+----------------+--------------------+-----------------+-----------------+--------------------+-------------------+
|543748.279999997|2.7251819526400003E9|723068.1400000032|             1082|3.6239052483200006E9|8.987232956800001E8|
+----------------+--------------------+-----------------+-----------------+--------------------+-------------------+



In [95]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Vegetables' AND Sales_Channel =='Online' ").show()

+-----------------+-------------------+------------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+-------------------+------------------+-----------------+-------------------+--------------------+
|97476.96000000014|4.946089157099997E8|165152.31999999876|             1072|8.380012048199997E8|3.4339228910999984E8|
+-----------------+-------------------+------------------+-----------------+-------------------+--------------------+



In [110]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Vegetables' AND Sales_Channel =='Offline' ").show()

+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+
|91293.72000000025| 4.4743061265E8|154676.2399999989|             1004|7.580684063000003E8|3.1063779365000004E8|
+-----------------+---------------+-----------------+-----------------+-------------------+--------------------+



In [111]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Office Supplies' AND Sales_Channel =='Online' ").show()

+-----------------+--------------------+-----------------+-----------------+-------------------+-----------------+
|   sum(Unit_Cost)|     sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|sum(Total_Profit)|
+-----------------+--------------------+-----------------+-----------------+-------------------+-----------------+
|564856.9600000026|2.8495511247999988E9|700701.9600000045|             1076|3.534852537299999E9|    6.853014125E8|
+-----------------+--------------------+-----------------+-----------------+-------------------+-----------------+



In [112]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Office Supplies' AND Sales_Channel =='Offline' ").show()

+-----------------+-------------------+-----------------+-----------------+-------------------+-----------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|sum(Total_Profit)|
+-----------------+-------------------+-----------------+-----------------+-------------------+-----------------+
|574831.2000000046|2.936992137119999E9|713074.9500000064|             1095|3.643322633370001E9|   7.0633049625E8|
+-----------------+-------------------+-----------------+-----------------+-------------------+-----------------+



In [113]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Cosmetics' AND Sales_Channel =='Online' ").show()

+-----------------+--------------------+-----------------+-----------------+-------------------+-------------------+
|   sum(Unit_Cost)|     sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|  sum(Total_Profit)|
+-----------------+--------------------+-----------------+-----------------+-------------------+-------------------+
|293612.9499999981|1.5014615772500005E9|487478.0000000048|             1115|2.492837889999999E9|9.913763127500002E8|
+-----------------+--------------------+-----------------+-----------------+-------------------+-------------------+



In [114]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Cosmetics' AND Sales_Channel =='Offline' ").show()

+-----------------+--------------------+----------------+-----------------+------------------+-------------------+
|   sum(Unit_Cost)|     sum(Total_Cost)| sum(Unit_Price)|count(Units_Sold)|sum(Total_Revenue)|  sum(Total_Profit)|
+-----------------+--------------------+----------------+-----------------+------------------+-------------------+
|300196.1999999981|1.4790045315199997E9|498408.000000005|             1140|    2.4555530368E9|9.765485052799995E8|
+-----------------+--------------------+----------------+-----------------+------------------+-------------------+



In [115]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Personal Care' AND Sales_Channel =='Online' ").show()

+-----------------+-------------------+-----------------+-----------------+-------------------+-----------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|sum(Total_Profit)|
+-----------------+-------------------+-----------------+-----------------+-------------------+-----------------+
|63527.06999999909|3.089139503400003E8|91619.33000000083|             1121|4.455185664600003E8|   1.3660461612E8|
+-----------------+-------------------+-----------------+-----------------+-------------------+-----------------+



In [116]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Personal Care' AND Sales_Channel =='Offline' ").show()

+------------------+-------------------+-----------------+-----------------+--------------------+--------------------+
|    sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)|  sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+-------------------+-----------------+-----------------+--------------------+--------------------+
|61430.279999999155|2.982956357699999E8|88595.32000000076|             1084|4.3020473463000005E8|1.3190909885999998E8|
+------------------+-------------------+-----------------+-----------------+--------------------+--------------------+



In [117]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Fruits' AND Sales_Channel =='Online' ").show()

+------------------+-------------------+------------------+-----------------+-------------------+--------------------+
|    sum(Unit_Cost)|    sum(Total_Cost)|   sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+------------------+-------------------+------------------+-----------------+-------------------+--------------------+
|7494.3600000000515|3.548616828000001E7|10104.389999999941|             1083|4.784479046999997E7|1.2358622190000016E7|
+------------------+-------------------+------------------+-----------------+-------------------+--------------------+



In [118]:
sqlContext.sql("select sum(Unit_Cost),sum(Total_Cost),sum(Unit_Price),count(Units_Sold),sum(Total_Revenue),sum(Total_Profit) from big_data where Region == 'Sub-Saharan Africa' AND Item_Type == 'Fruits' AND Sales_Channel =='Offline' ").show()

+-----------------+-------------------+-----------------+-----------------+-------------------+--------------------+
|   sum(Unit_Cost)|    sum(Total_Cost)|  sum(Unit_Price)|count(Units_Sold)| sum(Total_Revenue)|   sum(Total_Profit)|
+-----------------+-------------------+-----------------+-----------------+-------------------+--------------------+
|7224.480000000049|3.608332275999997E7|9740.519999999944|             1044|4.864991349000005E7|1.2566590729999995E7|
+-----------------+-------------------+-----------------+-----------------+-------------------+--------------------+

