In [1]:
import os
import sys
from pyspark.sql import SparkSession

os.environ["HADOOP_HOME"] = "C:\\Users\\SkJain\\Downloads\\Compressed\\winutils-master\\hadoop-3.2.2"
os.environ["PYSPARK_PYTHON"] = "python"
sys.path.append('C:\\Users\\SkJain\\Downloads\\Compressed\\winutils-master\\hadoop-3.2.2\\bin')

In [2]:
spark = SparkSession. \
    builder. \
    config("spark.ui.port", "0"). \
    enableHiveSupport(). \
    appName('Pyspark - Part 4'). \
    master('local'). \
    getOrCreate()

In [3]:
from pyspark.sql.functions import *

In [14]:
airtrafic_data_path = 'C:/Users/SkJain/Downloads/Compressed/data-master/airtraffic_all'
airportCodes_data_path = 'C:/Users/SkJain/Downloads/Compressed/data-master/airport-codes-na.txt'

In [17]:
airtraffic_df = spark.read.parquet(airtrafic_data_path)
airportCodes_df = spark.read.csv(airportCodes_data_path, inferSchema=True, header=True, sep='\t')

In [18]:
airportCodes_df.show()

+-----------+-----+-------+----+
|       City|State|Country|IATA|
+-----------+-----+-------+----+
| Abbotsford|   BC| Canada| YXX|
|   Aberdeen|   SD|    USA| ABR|
|    Abilene|   TX|    USA| ABI|
|      Akron|   OH|    USA| CAK|
|    Alamosa|   CO|    USA| ALS|
|     Albany|   GA|    USA| ABY|
|     Albany|   NY|    USA| ALB|
|Albuquerque|   NM|    USA| ABQ|
| Alexandria|   LA|    USA| AEX|
|  Allentown|   PA|    USA| ABE|
|   Alliance|   NE|    USA| AIA|
|     Alpena|   MI|    USA| APN|
|    Altoona|   PA|    USA| AOO|
|   Amarillo|   TX|    USA| AMA|
|Anahim Lake|   BC| Canada| YAA|
|  Anchorage|   AK|    USA| ANC|
|   Appleton|   WI|    USA| ATW|
|     Arviat|  NWT| Canada| YEK|
|  Asheville|   NC|    USA| AVL|
|      Aspen|   CO|    USA| ASE|
+-----------+-----+-------+----+
only showing top 20 rows



In [19]:
airportCodes_df.printSchema()

root
 |-- City: string (nullable = true)
 |-- State: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- IATA: string (nullable = true)



In [20]:
airportCodes_df.count()

526

In [21]:
airtraffic_df.show(5)

+----+-----+----------+---------+-------+----------+-------+----------+-------------+---------+-------+-----------------+--------------+-------+--------+--------+------+----+--------+------+-------+---------+----------------+--------+------------+------------+--------+-------------+-----------------+------------+------------+-----------+
|Year|Month|DayofMonth|DayOfWeek|DepTime|CRSDepTime|ArrTime|CRSArrTime|UniqueCarrier|FlightNum|TailNum|ActualElapsedTime|CRSElapsedTime|AirTime|ArrDelay|DepDelay|Origin|Dest|Distance|TaxiIn|TaxiOut|Cancelled|CancellationCode|Diverted|CarrierDelay|WeatherDelay|NASDelay|SecurityDelay|LateAircraftDelay|IsArrDelayed|IsDepDelayed|flightmonth|
+----+-----+----------+---------+-------+----------+-------+----------+-------------+---------+-------+-----------------+--------------+-------+--------+--------+------+----+--------+------+-------+---------+----------------+--------+------------+------------+--------+-------------+-----------------+------------+------

In [24]:
spark.conf.get("spark.sql.shuffle.partitions")

'200'

In [25]:
# 200 is too much for such small data
spark.conf.set("spark.sql.shuffle.partitions",2)

In [26]:
spark.conf.get("spark.sql.shuffle.partitions")

'2'

In [27]:
airportCodes_df.show(5)

+----------+-----+-------+----+
|      City|State|Country|IATA|
+----------+-----+-------+----+
|Abbotsford|   BC| Canada| YXX|
|  Aberdeen|   SD|    USA| ABR|
|   Abilene|   TX|    USA| ABI|
|     Akron|   OH|    USA| CAK|
|   Alamosa|   CO|    USA| ALS|
+----------+-----+-------+----+
only showing top 5 rows



In [29]:
# IATA isthe airport code
# since each airport will have a unique airport code,
# then no.of airport codes should be equal to number of records, if there are no duplicates
print(airportCodes_df.count())
print(airportCodes_df.select('IATA').distinct().count())

526
524


In [41]:
#there are two duplicate records
airportCodes_df. \
groupby('IATA'). \
agg(collect_list('City'), collect_list('State'), collect_list('Country'), count('*').alias('recCount')). \
filter(col('recCount')>1). \
show(truncate=False)

+----+----------------------------+--------------------+---------------------+--------+
|IATA|collect_list(City)          |collect_list(State) |collect_list(Country)|recCount|
+----+----------------------------+--------------------+---------------------+--------+
|Big |[Hilo, Kailua-Kona, Kamuela]|[HI, Hawaii, Hawaii]|[USA, USA, USA]      |3       |
+----+----------------------------+--------------------+---------------------+--------+



In [43]:
#only first one is correct
airportCodes_df. \
filter(~((col('State')=='Hawaii') & (col('IATA')=='Big'))). \
count()

524

In [54]:
# no. of airports by stated sorted by count in desc order

airportCodes_df. \
filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')). \
groupby('Country','State'). \
agg(count('*').alias('numOfAirports')). \
sort(col('numOfAirports').desc()). \
show(52)

+-------+-----+-------------+
|Country|State|numOfAirports|
+-------+-----+-------------+
|    USA|   CA|           29|
|    USA|   TX|           26|
|    USA|   AK|           25|
|    USA|   NY|           18|
|    USA|   MI|           18|
|    USA|   FL|           18|
|    USA|   MT|           14|
|    USA|   PA|           13|
|    USA|   IL|           12|
|    USA|   CO|           12|
|    USA|   WY|           10|
|    USA|   NC|           10|
|    USA|   WI|            9|
|    USA|   NE|            9|
|    USA|   GA|            9|
|    USA|   NM|            9|
|    USA|   HI|            9|
|    USA|   WA|            9|
|    USA|   KS|            9|
|    USA|   ND|            8|
|    USA|   MO|            8|
|    USA|   AR|            8|
|    USA|   MA|            8|
|    USA|   MN|            8|
|    USA|   AZ|            8|
|    USA|   WV|            8|
|    USA|   IA|            8|
|    USA|   SD|            7|
|    USA|   ME|            7|
|    USA|   VA|            7|
|    USA| 

In [55]:
# total 51 records should be there (50 states and null)
airportCodes_df. \
filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')). \
groupby('Country','State'). \
agg(count('*').alias('numOfAirports')). \
sort(col('numOfAirports').desc()). \
count()

51

## Inner Join

In [56]:
orders_json = 'C:/Users/SkJain/Downloads/Compressed/data-master/retail_db_json/orders'
order_items_json = 'C:/Users/SkJain/Downloads/Compressed/data-master/retail_db_json/order_items'

In [57]:
orders_df = spark.read.json(orders_json)
order_items_df = spark.read.json(order_items_json)

In [58]:
orders_df.printSchema()

root
 |-- order_customer_id: long (nullable = true)
 |-- order_date: string (nullable = true)
 |-- order_id: long (nullable = true)
 |-- order_status: string (nullable = true)



In [59]:
order_items_df.printSchema()

root
 |-- order_item_id: long (nullable = true)
 |-- order_item_order_id: long (nullable = true)
 |-- order_item_product_id: long (nullable = true)
 |-- order_item_product_price: double (nullable = true)
 |-- order_item_quantity: long (nullable = true)
 |-- order_item_subtotal: double (nullable = true)



In [60]:
# order_id and order_item_order_id are the common columns

#inner is default
order_joined = orders_df. \
                join(order_items_df, orders_df['order_id'] == order_items_df['order_item_order_id'])

In [62]:
order_joined.show(3)

+-----------------+--------------------+--------+---------------+-------------+-------------------+---------------------+------------------------+-------------------+-------------------+
|order_customer_id|          order_date|order_id|   order_status|order_item_id|order_item_order_id|order_item_product_id|order_item_product_price|order_item_quantity|order_item_subtotal|
+-----------------+--------------------+--------+---------------+-------------+-------------------+---------------------+------------------------+-------------------+-------------------+
|            11599|2013-07-25 00:00:...|       1|         CLOSED|            1|                  1|                  957|                  299.98|                  1|             299.98|
|              256|2013-07-25 00:00:...|       2|PENDING_PAYMENT|            2|                  2|                 1073|                  199.99|                  1|             199.99|
|              256|2013-07-25 00:00:...|       2|PENDING_PAYMENT|

In [63]:
print(orders_df.count())
print(order_items_df.count())
print(order_joined.count())

68883
172198
172198


In [64]:
order_joined.select(orders_df['*'], order_items_df['order_item_subtotal']).show()

+-----------------+--------------------+--------+---------------+-------------------+
|order_customer_id|          order_date|order_id|   order_status|order_item_subtotal|
+-----------------+--------------------+--------+---------------+-------------------+
|            11599|2013-07-25 00:00:...|       1|         CLOSED|             299.98|
|              256|2013-07-25 00:00:...|       2|PENDING_PAYMENT|             199.99|
|              256|2013-07-25 00:00:...|       2|PENDING_PAYMENT|              250.0|
|              256|2013-07-25 00:00:...|       2|PENDING_PAYMENT|             129.99|
|             8827|2013-07-25 00:00:...|       4|         CLOSED|              49.98|
|             8827|2013-07-25 00:00:...|       4|         CLOSED|             299.95|
|             8827|2013-07-25 00:00:...|       4|         CLOSED|              150.0|
|             8827|2013-07-25 00:00:...|       4|         CLOSED|             199.92|
|            11318|2013-07-25 00:00:...|       5|     

## Left/Right Outer Join

In [66]:
customers_json = 'C:/Users/SkJain/Downloads/Compressed/data-master/retail_db_json/customers'

In [68]:
customer_df = spark.read.json(customers_json)

In [69]:
customer_df.printSchema()

root
 |-- customer_city: string (nullable = true)
 |-- customer_email: string (nullable = true)
 |-- customer_fname: string (nullable = true)
 |-- customer_id: long (nullable = true)
 |-- customer_lname: string (nullable = true)
 |-- customer_password: string (nullable = true)
 |-- customer_state: string (nullable = true)
 |-- customer_street: string (nullable = true)
 |-- customer_zipcode: string (nullable = true)



In [70]:
orders_df.printSchema()

root
 |-- order_customer_id: long (nullable = true)
 |-- order_date: string (nullable = true)
 |-- order_id: long (nullable = true)
 |-- order_status: string (nullable = true)



In [72]:
#customer id is the common column

customer_orders_df = customer_df. \
                    join(orders_df, orders_df['order_customer_id'] == customer_df['customer_id'], how='left')

In [73]:
print(orders_df.count())
print(customer_df.count())
print(customer_orders_df.count())

68883
12435
68913


In [88]:
customer_orders_df.filter(orders_df['order_id'].isNull()).show(2)

+-------------+--------------+--------------+-----------+--------------+-----------------+--------------+--------------------+----------------+-----------------+----------+--------+------------+
|customer_city|customer_email|customer_fname|customer_id|customer_lname|customer_password|customer_state|     customer_street|customer_zipcode|order_customer_id|order_date|order_id|order_status|
+-------------+--------------+--------------+-----------+--------------+-----------------+--------------+--------------------+----------------+-----------------+----------+--------+------------+
|       Denver|     XXXXXXXXX|          Mary|        219|       Harrell|        XXXXXXXXX|            CO|9016 Foggy Robin ...|           80219|             null|      null|    null|        null|
|   Long Beach|     XXXXXXXXX|          Mary|        339|        Greene|        XXXXXXXXX|            CA|     4271 Hazy Close|           90805|             null|      null|    null|        null|
+-------------+----------

In [75]:
customer_orders_df.filter(orders_df['order_id'].isNull()).count()

30

In [95]:
# number of orders placed in 2013 by each customer. If customer has not placed any oorders then order count should be 0

orders_df. \
filter(year('order_date') == 2013). \
join(customer_df, orders_df['order_customer_id'] == customer_df['customer_id'], how='right'). \
withColumn('orderPlaced', when(col('order_id').isNull(), 0).otherwise(1)). \
groupby(customer_df['customer_id']). \
agg(sum('orderPlaced').alias('orderCount')). \
sort(col('orderCount').desc()). \
show()

+-----------+----------+
|customer_id|orderCount|
+-----------+----------+
|       2433|        11|
|       5293|        10|
|       4876|        10|
|       9392|         9|
|       5138|         9|
|       8757|         9|
|       3755|         9|
|      10591|         9|
|       5904|         9|
|       6812|         9|
|       9619|         9|
|       2277|         9|
|       3708|         9|
|      11645|         9|
|      10965|         9|
|       4116|         9|
|       1365|         8|
|       5821|         8|
|       6130|         8|
|       7200|         8|
+-----------+----------+
only showing top 20 rows



## Problem 1: Get number of flights departed from each US airport in 2008 January

In [99]:
airportCodes_df.printSchema()

root
 |-- City: string (nullable = true)
 |-- State: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- IATA: string (nullable = true)



In [100]:
airtraffic_df.printSchema()

root
 |-- Year: integer (nullable = true)
 |-- Month: integer (nullable = true)
 |-- DayofMonth: integer (nullable = true)
 |-- DayOfWeek: integer (nullable = true)
 |-- DepTime: string (nullable = true)
 |-- CRSDepTime: integer (nullable = true)
 |-- ArrTime: string (nullable = true)
 |-- CRSArrTime: integer (nullable = true)
 |-- UniqueCarrier: string (nullable = true)
 |-- FlightNum: integer (nullable = true)
 |-- TailNum: string (nullable = true)
 |-- ActualElapsedTime: string (nullable = true)
 |-- CRSElapsedTime: integer (nullable = true)
 |-- AirTime: string (nullable = true)
 |-- ArrDelay: string (nullable = true)
 |-- DepDelay: string (nullable = true)
 |-- Origin: string (nullable = true)
 |-- Dest: string (nullable = true)
 |-- Distance: string (nullable = true)
 |-- TaxiIn: string (nullable = true)
 |-- TaxiOut: string (nullable = true)
 |-- Cancelled: integer (nullable = true)
 |-- CancellationCode: string (nullable = true)
 |-- Diverted: integer (nullable = true)
 |-- Car

In [111]:
airtraffic_df. \
join(
    airportCodes_df.filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')), 
    airtraffic_df['Origin'] == airportCodes_df['IATA']
). \
groupby(airportCodes_df['IATA']). \
agg(count('*').alias('FlightCount')). \
sort(col('FlightCount').desc()). \
show()

+----+-----------+
|IATA|FlightCount|
+----+-----------+
| ATL|      33897|
| ORD|      29936|
| DFW|      23861|
| DEN|      19477|
| LAX|      18945|
| PHX|      17695|
| IAH|      15531|
| LAS|      15292|
| DTW|      14357|
| EWR|      12467|
| SLC|      12401|
| MSP|      11800|
| SFO|      11573|
| MCO|      11070|
| CLT|      10752|
| LGA|      10300|
| JFK|      10023|
| BOS|       9717|
| BWI|       8883|
| CVG|       8659|
+----+-----------+
only showing top 20 rows



In [109]:
airtraffic_df. \
join(
    airportCodes_df.filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')), 
    airtraffic_df['Origin'] == airportCodes_df['IATA']
). \
groupby(airportCodes_df['IATA']). \
agg(count('*').alias('FlightCount')). \
count()

270

## Problem 2: Get number of flights departed from each US State in 2008 January

In [112]:
airtraffic_df. \
join(
    airportCodes_df.filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')), 
    airtraffic_df['Origin'] == airportCodes_df['IATA']
). \
groupby(airportCodes_df['State']). \
agg(count('*').alias('FlightCount')). \
sort(col('FlightCount').desc()). \
show()

+-----+-----------+
|State|FlightCount|
+-----+-----------+
|   CA|      72853|
|   TX|      63930|
|   FL|      41042|
|   IL|      39812|
|   GA|      35527|
|   NY|      28414|
|   CO|      23288|
|   AZ|      20768|
|   OH|      19209|
|   NC|      17942|
|   MI|      17824|
|   NV|      17763|
| null|      14090|
|   TN|      13549|
|   PA|      13491|
|   UT|      12709|
|   NJ|      12498|
|   MN|      12357|
|   MO|      11808|
|   WA|      10210|
+-----+-----------+
only showing top 20 rows



## Problem 3: Get list of US airport from which no flight has departed in Jan 2008

In [116]:
airtraffic_df. \
join(
    airportCodes_df.filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')), 
    airtraffic_df['Origin'] == airportCodes_df['IATA'],
    how='right'
). \
filter(airtraffic_df['Origin'].isNull()). \
select(airportCodes_df['*']) .\
distinct(). \
show()

+--------------+-----+-------+----+
|          City|State|Country|IATA|
+--------------+-----+-------+----+
|      Aberdeen|   SD|    USA| ABR|
|        Alpena|   MI|    USA| APN|
|        Athens|   GA|    USA| AHN|
|       Bedford|   MA|    USA| BED|
|       Bemidji|   MN|    USA| BJI|
|   Bloomington|   IN|    USA| BMG|
|     Brookings|   SD|    USA| BKX|
|    Burlington|   IA|    USA| BRL|
|    Burlington|   MA|    USA| BBF|
|Cape Girardeau|   MO|    USA| CGI|
|      Carlsbad|   NM|    USA| CNM|
|       Chicago|   IL|    USA| CHI|
|    Clarksburg|   WV|    USA| CKB|
|      Columbus|   IN|    USA| CLU|
|      Columbus|   NE|    USA| OLU|
|       Decatur|   IL|    USA| DEC|
|   Devils Lake|   ND|    USA| DVL|
|     Dickinson|   ND|    USA| DIK|
|    Dodge City|   KS|    USA| DDC|
|       Du Bois|   PA|    USA| DUJ|
+--------------+-----+-------+----+
only showing top 20 rows



## Problem 4: Any origin airport which are not in airport code

In [117]:
airtraffic_df. \
join(
    airportCodes_df.filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')), 
    airtraffic_df['Origin'] == airportCodes_df['IATA'],
    how='left'
). \
filter(airportCodes_df['IATA'].isNull()). \
select(airtraffic_df['Origin'], airportCodes_df['*']) .\
distinct(). \
show()

+------+----+-----+-------+----+
|Origin|City|State|Country|IATA|
+------+----+-----+-------+----+
|   HDN|null| null|   null|null|
|   SJU|null| null|   null|null|
|   ITO|null| null|   null|null|
|   STT|null| null|   null|null|
|   CEC|null| null|   null|null|
|   CDC|null| null|   null|null|
|   PSG|null| null|   null|null|
|   ADK|null| null|   null|null|
|   KOA|null| null|   null|null|
|   OTZ|null| null|   null|null|
|   BQN|null| null|   null|null|
|   STX|null| null|   null|null|
|   PMD|null| null|   null|null|
|   PSE|null| null|   null|null|
|   SCC|null| null|   null|null|
|   SLE|null| null|   null|null|
+------+----+-----+-------+----+



In [118]:
airtraffic_df. \
join(
    airportCodes_df.filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')), 
    airtraffic_df['Origin'] == airportCodes_df['IATA'],
    how='left'
). \
filter(airportCodes_df['IATA'].isNull()). \
select(airtraffic_df['Origin'], airportCodes_df['*']) .\
distinct(). \
count()

16

## Problem 5: Get number of flights departed from airports which are not in airport code

In [121]:
airtraffic_df. \
join(
    airportCodes_df.filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')), 
    airtraffic_df['Origin'] == airportCodes_df['IATA'],
    how='left'
). \
filter(airportCodes_df['IATA'].isNull()). \
count()

5585

## Problem 6: Total number of flights per airport which are not in airport code

In [122]:
airtraffic_df. \
join(
    airportCodes_df.filter(~((col('State')=='Hawaii') & (col('IATA')=='Big')) & (col('Country') == 'USA')), 
    airtraffic_df['Origin'] == airportCodes_df['IATA'],
    how='left'
). \
filter(airportCodes_df['IATA'].isNull()). \
groupby(airtraffic_df['Origin']).\
agg(count('*').alias('flightCount')). \
sort(col('flightCount').desc()). \
show()

+------+-----------+
|Origin|flightCount|
+------+-----------+
|   SJU|       1997|
|   KOA|       1316|
|   ITO|        786|
|   HDN|        429|
|   STT|        311|
|   BQN|        124|
|   PSE|        110|
|   OTZ|         92|
|   CEC|         88|
|   PSG|         62|
|   SCC|         62|
|   PMD|         57|
|   SLE|         54|
|   CDC|         48|
|   STX|         40|
|   ADK|          9|
+------+-----------+



## Problem 7: revenue for each date which are completed or closed

In [123]:
orders_df.printSchema()

root
 |-- order_customer_id: long (nullable = true)
 |-- order_date: string (nullable = true)
 |-- order_id: long (nullable = true)
 |-- order_status: string (nullable = true)



In [124]:
order_items_df.printSchema()

root
 |-- order_item_id: long (nullable = true)
 |-- order_item_order_id: long (nullable = true)
 |-- order_item_product_id: long (nullable = true)
 |-- order_item_product_price: double (nullable = true)
 |-- order_item_quantity: long (nullable = true)
 |-- order_item_subtotal: double (nullable = true)



In [129]:
orders_df. \
filter(col('order_status').isin('CLOSED', 'COMPLETE')). \
join(order_items_df, orders_df['order_id'] == order_items_df['order_item_order_id']). \
groupby('order_date'). \
agg(sum('order_item_subtotal')). \
sort('order_date'). \
show()

+--------------------+------------------------+
|          order_date|sum(order_item_subtotal)|
+--------------------+------------------------+
|2013-07-25 00:00:...|      31547.230000000014|
|2013-07-26 00:00:...|       54713.23000000002|
|2013-07-27 00:00:...|       48411.48000000003|
|2013-07-28 00:00:...|       35672.03000000004|
|2013-07-29 00:00:...|      54579.699999999946|
|2013-07-30 00:00:...|       49329.29000000002|
|2013-07-31 00:00:...|      59212.490000000056|
|2013-08-01 00:00:...|      49160.080000000045|
|2013-08-02 00:00:...|       50688.58000000002|
|2013-08-03 00:00:...|       43416.74000000001|
|2013-08-04 00:00:...|       35093.01000000003|
|2013-08-05 00:00:...|      34025.270000000026|
|2013-08-06 00:00:...|       57843.89000000003|
|2013-08-07 00:00:...|       45525.59000000006|
|2013-08-08 00:00:...|       33549.47000000002|
|2013-08-09 00:00:...|      29225.160000000018|
|2013-08-10 00:00:...|       46435.04000000003|
|2013-08-11 00:00:...|                 3

## Problem 8: revenue for each year, month and date which are completed or closed

In [136]:
orders_df. \
filter(col('order_status').isin('CLOSED', 'COMPLETE')). \
join(order_items_df, orders_df['order_id'] == order_items_df['order_item_order_id']). \
rollup(
    year('order_date').alias('orderYear'), 
    month('order_date').alias('orderMonth'), 
    dayofmonth('order_date').alias('orderDay')
). \
agg(sum('order_item_subtotal')). \
sort('orderYear', 'orderMonth', 'orderDay'). \
show()

+---------+----------+--------+------------------------+
|orderYear|orderMonth|orderDay|sum(order_item_subtotal)|
+---------+----------+--------+------------------------+
|     null|      null|    null|     1.501298248001313E7|
|     2013|      null|    null|       6686892.000003136|
|     2013|         7|    null|      333465.44999999914|
|     2013|         7|      25|      31547.230000000014|
|     2013|         7|      26|       54713.23000000002|
|     2013|         7|      27|       48411.48000000003|
|     2013|         7|      28|       35672.03000000004|
|     2013|         7|      29|      54579.699999999946|
|     2013|         7|      30|       49329.29000000002|
|     2013|         7|      31|      59212.490000000056|
|     2013|         8|    null|      1221828.8999999312|
|     2013|         8|       1|      49160.080000000045|
|     2013|         8|       2|       50688.58000000002|
|     2013|         8|       3|       43416.74000000001|
|     2013|         8|       4|