# Exercise 1: Schema on Read

In [29]:
from pyspark.sql import SparkSession
import pandas as pd
import matplotlib

In [30]:
spark = SparkSession.builder.getOrCreate()

In [31]:
dfLog = spark.read.text("data/NASA_access_log_Jul95.gz")

# Load the dataset

In [32]:
#Data Source: http://ita.ee.lbl.gov/traces/NASA_access_log_Jul95.gz
dfLog = spark.read.text("data/NASA_access_log_Jul95.gz")

# Quick inspection of  the data set

In [33]:
# see the schema
dfLog.printSchema()

root
 |-- value: string (nullable = true)



In [34]:
# number of lines
dfLog.count()

1891715

In [35]:
#what's in there? 
dfLog.show(7)

+--------------------+
|               value|
+--------------------+
|199.72.81.55 - - ...|
|unicomp6.unicomp....|
|199.120.110.21 - ...|
|burger.letters.co...|
|199.120.110.21 - ...|
|burger.letters.co...|
|burger.letters.co...|
+--------------------+
only showing top 7 rows



In [36]:
#a better show?
dfLog.show(5, truncate=False)

+-----------------------------------------------------------------------------------------------------------------------+
|value                                                                                                                  |
+-----------------------------------------------------------------------------------------------------------------------+
|199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] "GET /history/apollo/ HTTP/1.0" 200 6245                                 |
|unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] "GET /shuttle/countdown/ HTTP/1.0" 200 3985                      |
|199.120.110.21 - - [01/Jul/1995:00:00:09 -0400] "GET /shuttle/missions/sts-73/mission-sts-73.html HTTP/1.0" 200 4085   |
|burger.letters.com - - [01/Jul/1995:00:00:11 -0400] "GET /shuttle/countdown/liftoff.html HTTP/1.0" 304 0               |
|199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] "GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0" 200 4179|
+-----------------------

In [37]:
#pandas to the rescue
pd.set_option('max_colwidth', 300)
dfLog.limit(5).toPandas()

Unnamed: 0,value
0,"199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] ""GET /history/apollo/ HTTP/1.0"" 200 6245"
1,"unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] ""GET /shuttle/countdown/ HTTP/1.0"" 200 3985"
2,"199.120.110.21 - - [01/Jul/1995:00:00:09 -0400] ""GET /shuttle/missions/sts-73/mission-sts-73.html HTTP/1.0"" 200 4085"
3,"burger.letters.com - - [01/Jul/1995:00:00:11 -0400] ""GET /shuttle/countdown/liftoff.html HTTP/1.0"" 304 0"
4,"199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] ""GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0"" 200 4179"


# Let' try simple parsing with split

In [38]:
from pyspark.sql.functions import split
import re
# Todo
string = "199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0\" 200 4179"
pattern = "^(\S+) (\S+) (\S+) \[(\S+) (\-[\d]+)\] \"([A-Za-z]+) (\S+) (\S+)\" ([0-9]+) (\d+)"
result = re.search(pattern, string)
#print(result.group(1), result.group(2), result.group(3), result.group(4), \
#      result.group(5), result.group(6), result.group(7), result.group(8))
dict1 = dict()
dict1['host'] = result.group(1)
dict1['client_identd'] = result.group(2)
dict1['user_id'] = result.group(3)
dict1['date_time'] = result.group(4) + ' ' + result.group(5)
dict1['method'] = result.group(6)
dict1['endpoint'] = result.group(7)
dict1['protocol'] = result.group(8)
dict1['response_code'] = result.group(9)
dict1['content_size'] = result.group(10)
print(dict1)


string = "unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985"
pattern = "^(\S+) (\S+) (\S+) \[(\S+) (\-[\d]+)\] \"([A-Za-z]+) (\S+) (\S+)\" ([0-9]+) (\d+)"
result = re.search(pattern, string)
#print(result.group(1), result.group(2), result.group(3), result.group(4), \
#      result.group(5), result.group(6), result.group(7), result.group(8))
dict1 = dict()
dict1['host'] = result.group(1)
dict1['client_identd'] = result.group(2)
dict1['user_id'] = result.group(3)
dict1['date_time'] = result.group(4) + ' ' + result.group(5)
dict1['method'] = result.group(6)
dict1['endpoint'] = result.group(7)
dict1['protocol'] = result.group(8)
dict1['response_code'] = result.group(9)
dict1['content_size'] = result.group(10)
print(dict1)

{'host': '199.120.110.21', 'client_identd': '-', 'user_id': '-', 'date_time': '01/Jul/1995:00:00:11 -0400', 'method': 'GET', 'endpoint': '/shuttle/missions/sts-73/sts-73-patch-small.gif', 'protocol': 'HTTP/1.0', 'response_code': '200', 'content_size': '4179'}
{'host': 'unicomp6.unicomp.net', 'client_identd': '-', 'user_id': '-', 'date_time': '01/Jul/1995:00:00:06 -0400', 'method': 'GET', 'endpoint': '/shuttle/countdown/', 'protocol': 'HTTP/1.0', 'response_code': '200', 'content_size': '3985'}


# Second attempt, let's build a custom parsing UDF 

In [39]:
#from pyspark.sql.functions import udf

#@udf
#def parseUDF(line):
# Todo

In [40]:
#Let's start from the beginning
# Todo

In [41]:
#dfParsed.printSchema()

# Third attempt, let's fix our UDF

In [42]:
#from pyspark.sql.functions import udf # already imported
#from pyspark.sql.types import MapType, StringType

#@udf(MapType(StringType(),StringType()))
#def parseUDFbetter(line):
#    pattern = ""

In [43]:
#Let's start from the beginning
# Todo

In [44]:
#Let's start from the beginning
# Todo
from pyspark.sql.functions import split
import re
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType, IntegerType, FloatType
from pyspark.sql.types import ArrayType, MapType


@udf(returnType=MapType(StringType(), StringType()))
def parseUDFbetter(line):
    pattern = "^(\S+) (\S+) (\S+) \[(\S+) (\-[\d]+)\] \"([A-Za-z]+) (\S+) (\S+)\" ([0-9]+) (\d+)"
    result = re.search(pattern, line)
    if result is None:
        return(line, 0)
    dict1 = dict()
    dict1['host'] = result.group(1)
    dict1['client_identd'] = result.group(2)
    dict1['user_id'] = result.group(3)
    dict1['date_time'] = result.group(4) + ' ' + result.group(5)
    dict1['method'] = result.group(6)
    dict1['endpoint'] = result.group(7)
    dict1['protocol'] = result.group(8)
    dict1['response_code'] = result.group(9)
    dict1['content_size'] = result.group(10)
    if dict1['content_size'] == '-':
        dict1['content_size'] = 0
    return dict1

dfParsed = dfLog.withColumn("parsed", parseUDFbetter("value"))

In [45]:
#Bingo!! we'got a column of type map with the fields parsed
dfParsed.printSchema()

root
 |-- value: string (nullable = true)
 |-- parsed: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



In [46]:
#dfParsed.show(10, truncate=False)
#dfParsed.select("parsed").limit(10).show()
dfParsed.limit(12).toPandas()


Unnamed: 0,value,parsed
0,"199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] ""GET /history/apollo/ HTTP/1.0"" 200 6245","{'response_code': '200', 'protocol': 'HTTP/1.0', 'endpoint': '/history/apollo/', 'content_size': '6245', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:01 -0400', 'user_id': '-', 'host': '199.72.81.55', 'client_identd': '-'}"
1,"unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] ""GET /shuttle/countdown/ HTTP/1.0"" 200 3985","{'response_code': '200', 'protocol': 'HTTP/1.0', 'endpoint': '/shuttle/countdown/', 'content_size': '3985', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:06 -0400', 'user_id': '-', 'host': 'unicomp6.unicomp.net', 'client_identd': '-'}"
2,"199.120.110.21 - - [01/Jul/1995:00:00:09 -0400] ""GET /shuttle/missions/sts-73/mission-sts-73.html HTTP/1.0"" 200 4085","{'response_code': '200', 'protocol': 'HTTP/1.0', 'endpoint': '/shuttle/missions/sts-73/mission-sts-73.html', 'content_size': '4085', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:09 -0400', 'user_id': '-', 'host': '199.120.110.21', 'client_identd': '-'}"
3,"burger.letters.com - - [01/Jul/1995:00:00:11 -0400] ""GET /shuttle/countdown/liftoff.html HTTP/1.0"" 304 0","{'response_code': '304', 'protocol': 'HTTP/1.0', 'endpoint': '/shuttle/countdown/liftoff.html', 'content_size': '0', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:11 -0400', 'user_id': '-', 'host': 'burger.letters.com', 'client_identd': '-'}"
4,"199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] ""GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0"" 200 4179","{'response_code': '200', 'protocol': 'HTTP/1.0', 'endpoint': '/shuttle/missions/sts-73/sts-73-patch-small.gif', 'content_size': '4179', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:11 -0400', 'user_id': '-', 'host': '199.120.110.21', 'client_identd': '-'}"
5,"burger.letters.com - - [01/Jul/1995:00:00:12 -0400] ""GET /images/NASA-logosmall.gif HTTP/1.0"" 304 0","{'response_code': '304', 'protocol': 'HTTP/1.0', 'endpoint': '/images/NASA-logosmall.gif', 'content_size': '0', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:12 -0400', 'user_id': '-', 'host': 'burger.letters.com', 'client_identd': '-'}"
6,"burger.letters.com - - [01/Jul/1995:00:00:12 -0400] ""GET /shuttle/countdown/video/livevideo.gif HTTP/1.0"" 200 0","{'response_code': '200', 'protocol': 'HTTP/1.0', 'endpoint': '/shuttle/countdown/video/livevideo.gif', 'content_size': '0', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:12 -0400', 'user_id': '-', 'host': 'burger.letters.com', 'client_identd': '-'}"
7,"205.212.115.106 - - [01/Jul/1995:00:00:12 -0400] ""GET /shuttle/countdown/countdown.html HTTP/1.0"" 200 3985","{'response_code': '200', 'protocol': 'HTTP/1.0', 'endpoint': '/shuttle/countdown/countdown.html', 'content_size': '3985', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:12 -0400', 'user_id': '-', 'host': '205.212.115.106', 'client_identd': '-'}"
8,"d104.aa.net - - [01/Jul/1995:00:00:13 -0400] ""GET /shuttle/countdown/ HTTP/1.0"" 200 3985","{'response_code': '200', 'protocol': 'HTTP/1.0', 'endpoint': '/shuttle/countdown/', 'content_size': '3985', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:13 -0400', 'user_id': '-', 'host': 'd104.aa.net', 'client_identd': '-'}"
9,"129.94.144.152 - - [01/Jul/1995:00:00:13 -0400] ""GET / HTTP/1.0"" 200 7074","{'response_code': '200', 'protocol': 'HTTP/1.0', 'endpoint': '/', 'content_size': '7074', 'method': 'GET', 'date_time': '01/Jul/1995:00:00:13 -0400', 'user_id': '-', 'host': '129.94.144.152', 'client_identd': '-'}"


# Let's build separate columns

In [47]:
dfParsed.selectExpr("parsed['host'] as host").limit(5).show(5)

+--------------------+
|                host|
+--------------------+
|        199.72.81.55|
|unicomp6.unicomp.net|
|      199.120.110.21|
|  burger.letters.com|
|      199.120.110.21|
+--------------------+



In [48]:
dfParsed.selectExpr(["parsed['host']", "parsed['date_time']"]).show(5)

+--------------------+--------------------+
|        parsed[host]|   parsed[date_time]|
+--------------------+--------------------+
|        199.72.81.55|01/Jul/1995:00:00...|
|unicomp6.unicomp.net|01/Jul/1995:00:00...|
|      199.120.110.21|01/Jul/1995:00:00...|
|  burger.letters.com|01/Jul/1995:00:00...|
|      199.120.110.21|01/Jul/1995:00:00...|
+--------------------+--------------------+
only showing top 5 rows



In [49]:
fields = ["host", "client_identd","user_id", "date_time", "method", "endpoint", "protocol", "response_code", "content_size"]
exprs = list()
for field in fields:
    expr1 = "parsed['" + field + "'] as " + field
    exprs.append(expr1)
print(exprs)


["parsed['host'] as host", "parsed['client_identd'] as client_identd", "parsed['user_id'] as user_id", "parsed['date_time'] as date_time", "parsed['method'] as method", "parsed['endpoint'] as endpoint", "parsed['protocol'] as protocol", "parsed['response_code'] as response_code", "parsed['content_size'] as content_size"]


In [50]:
dfClean = dfParsed.selectExpr(*exprs)
dfClean.limit(5).toPandas()

Unnamed: 0,host,client_identd,user_id,date_time,method,endpoint,protocol,response_code,content_size
0,199.72.81.55,-,-,01/Jul/1995:00:00:01 -0400,GET,/history/apollo/,HTTP/1.0,200,6245
1,unicomp6.unicomp.net,-,-,01/Jul/1995:00:00:06 -0400,GET,/shuttle/countdown/,HTTP/1.0,200,3985
2,199.120.110.21,-,-,01/Jul/1995:00:00:09 -0400,GET,/shuttle/missions/sts-73/mission-sts-73.html,HTTP/1.0,200,4085
3,burger.letters.com,-,-,01/Jul/1995:00:00:11 -0400,GET,/shuttle/countdown/liftoff.html,HTTP/1.0,304,0
4,199.120.110.21,-,-,01/Jul/1995:00:00:11 -0400,GET,/shuttle/missions/sts-73/sts-73-patch-small.gif,HTTP/1.0,200,4179


## Popular hosts

In [51]:
import pyspark.sql.functions as func
dfClean1 = dfClean.groupBy("host").count().orderBy(func.desc("count")).limit(15)
print(dfClean1.toPandas())

                    host  count
0                   None  24442
1   piweba3y.prodigy.com  17381
2   piweba4y.prodigy.com  11489
3   piweba1y.prodigy.com   9730
4     alyssa.prodigy.com   7766
5    siltb10.orl.mmc.com   7573
6   piweba2y.prodigy.com   5861
7     edams.ksc.nasa.gov   5397
8           163.206.89.4   4870
9            news.ti.com   4827
10  disarray.demon.co.uk   4317
11    vagrant.vf.mmc.com   4133
12     poppy.hensa.ac.uk   4095
13         198.133.29.18   4086
14  www-a2.proxy.aol.com   4079


## Popular content

In [52]:
from pyspark.sql.functions import desc
dfClean2 = dfClean.groupBy("endpoint").count().sort(desc("count")).limit(10)
print(dfClean2.toPandas())

                       endpoint   count
0    /images/NASA-logosmall.gif  111086
1     /images/KSC-logosmall.gif   89529
2  /images/MOSAIC-logosmall.gif   60299
3     /images/USA-logosmall.gif   59844
4   /images/WORLD-logosmall.gif   59324
5    /images/ksclogo-medium.gif   58615
6       /images/launch-logo.gif   40841
7           /shuttle/countdown/   40248
8                     /ksc.html   40057
9      /images/ksclogosmall.gif   33555


## Large Files

In [53]:
#dfClean.createOrReplaceTempView("cleanlog")
#spark.sql("""
#select endpoint, content_size
#from cleanlog 
#order by content_size desc
#""").limit(10).toPandas()

from pyspark.sql.functions import desc

print(dfClean.printSchema())
dfClean_content = dfClean.select(dfClean.endpoint, dfClean.content_size).orderBy(desc("content_size")).limit(10)
print(dfClean_content.toPandas())

root
 |-- host: string (nullable = true)
 |-- client_identd: string (nullable = true)
 |-- user_id: string (nullable = true)
 |-- date_time: string (nullable = true)
 |-- method: string (nullable = true)
 |-- endpoint: string (nullable = true)
 |-- protocol: string (nullable = true)
 |-- response_code: string (nullable = true)
 |-- content_size: string (nullable = true)

None
                                            endpoint content_size
0                     /images/cdrom-1-95/img0007.jpg        99981
1  /shuttle/missions/sts-71/movies/sts-71-launch.mpg       999424
2  /shuttle/missions/sts-71/movies/sts-71-launch.mpg       999424
3         /history/apollo/apollo-13/images/index.gif        99942
4         /history/apollo/apollo-13/images/index.gif        99942
5         /history/apollo/apollo-13/images/index.gif        99942
6         /history/apollo/apollo-13/images/index.gif        99942
7         /history/apollo/apollo-13/images/index.gif        99942
8         /history/apollo/a

In [54]:
#from pyspark.sql.functions import expr
#dfCleanTyped = # Todo

In [55]:
#dfCleanTyped.createOrReplaceTempView("cleantypedlog")
#spark.sql("""
#select endpoint, content_size
#from cleantypedlog 
#order by content_size_bytes desc
#""").limit(10).toPandas()

from pyspark.sql.types import IntegerType

dfClean_new = dfClean.withColumn("content_size_int", dfClean["content_size"].cast(IntegerType()))
print(dfClean_new.printSchema())
dfClean_new_content = dfClean_new.select("endpoint", "content_size_int").sort(desc("content_size_int")).limit(10)
print(dfClean_new_content.toPandas())


root
 |-- host: string (nullable = true)
 |-- client_identd: string (nullable = true)
 |-- user_id: string (nullable = true)
 |-- date_time: string (nullable = true)
 |-- method: string (nullable = true)
 |-- endpoint: string (nullable = true)
 |-- protocol: string (nullable = true)
 |-- response_code: string (nullable = true)
 |-- content_size: string (nullable = true)
 |-- content_size_int: integer (nullable = true)

None
                                          endpoint  content_size_int
0          /shuttle/countdown/video/livevideo.jpeg           6823936
1            /statistics/1995/bkup/Mar95_full.html           3155499
2            /statistics/1995/bkup/Mar95_full.html           3155499
3            /statistics/1995/bkup/Mar95_full.html           3155499
4  /statistics/1995/Jun/Jun95_reverse_domains.html           2973350
5  /statistics/1995/Jun/Jun95_reverse_domains.html           2973350
6  /statistics/1995/Jun/Jun95_reverse_domains.html           2973350
7  /statistics/1995/

In [56]:
# Left for you, clean the date column :)
# 1- Create a udf that parses that weird format,
# 2- Create a new column with a data tiem string that spark would understand
# 3- Add a new date-time column properly typed
# 4- Print your schema

In [77]:
from pyspark.sql.functions import udf
from pyspark.sql.types import IntegerType, FloatType, StringType
from pyspark.sql.types import ArrayType, MapType
import re

string1 = "01/Jul/1995:00:00:01 -0400"
pattern = "^([0-9A-Za-z\/]+)\:(\d+\:\d+\:\d+) \-"
result = re.search(pattern, string1)
dict_date_time = dict()
dict_date_time["date"] = result.group(1)
dict_date_time["time"] = result.group(2)
print(dict_date_time)

@udf(returnType=MapType(StringType(), StringType()))
def format_date_time (line):
    pattern = "^([0-9A-Za-z\/]+)\:(\d+\:\d+\:\d+) \-"
    result = re.search(pattern, str(line))
    if result is None:
        return(string1, 0)
    dict_date_time = dict()
    if not result.group(1) is None:
        dict_date_time["date"] = result.group(1)
    else:
        dict_date_time["date"] = 0
    if not result.group(2) is None:
        dict_date_time["time"] = result.group(2)
    else:
        dict_date_time["time"] = 0
    return dict_date_time
    

{'date': '01/Jul/1995', 'time': '00:00:01'}


In [78]:
dfClean_date_time = dfClean.withColumn("date_time_new", format_date_time("date_time"))
dfClean_date_time.printSchema()
dfClean_date_time.limit(10).toPandas()

root
 |-- host: string (nullable = true)
 |-- client_identd: string (nullable = true)
 |-- user_id: string (nullable = true)
 |-- date_time: string (nullable = true)
 |-- method: string (nullable = true)
 |-- endpoint: string (nullable = true)
 |-- protocol: string (nullable = true)
 |-- response_code: string (nullable = true)
 |-- content_size: string (nullable = true)
 |-- date_time_new: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



Unnamed: 0,host,client_identd,user_id,date_time,method,endpoint,protocol,response_code,content_size,date_time_new
0,199.72.81.55,-,-,01/Jul/1995:00:00:01 -0400,GET,/history/apollo/,HTTP/1.0,200,6245,"{'date': '01/Jul/1995', 'time': '00:00:01'}"
1,unicomp6.unicomp.net,-,-,01/Jul/1995:00:00:06 -0400,GET,/shuttle/countdown/,HTTP/1.0,200,3985,"{'date': '01/Jul/1995', 'time': '00:00:06'}"
2,199.120.110.21,-,-,01/Jul/1995:00:00:09 -0400,GET,/shuttle/missions/sts-73/mission-sts-73.html,HTTP/1.0,200,4085,"{'date': '01/Jul/1995', 'time': '00:00:09'}"
3,burger.letters.com,-,-,01/Jul/1995:00:00:11 -0400,GET,/shuttle/countdown/liftoff.html,HTTP/1.0,304,0,"{'date': '01/Jul/1995', 'time': '00:00:11'}"
4,199.120.110.21,-,-,01/Jul/1995:00:00:11 -0400,GET,/shuttle/missions/sts-73/sts-73-patch-small.gif,HTTP/1.0,200,4179,"{'date': '01/Jul/1995', 'time': '00:00:11'}"
5,burger.letters.com,-,-,01/Jul/1995:00:00:12 -0400,GET,/images/NASA-logosmall.gif,HTTP/1.0,304,0,"{'date': '01/Jul/1995', 'time': '00:00:12'}"
6,burger.letters.com,-,-,01/Jul/1995:00:00:12 -0400,GET,/shuttle/countdown/video/livevideo.gif,HTTP/1.0,200,0,"{'date': '01/Jul/1995', 'time': '00:00:12'}"
7,205.212.115.106,-,-,01/Jul/1995:00:00:12 -0400,GET,/shuttle/countdown/countdown.html,HTTP/1.0,200,3985,"{'date': '01/Jul/1995', 'time': '00:00:12'}"
8,d104.aa.net,-,-,01/Jul/1995:00:00:13 -0400,GET,/shuttle/countdown/,HTTP/1.0,200,3985,"{'date': '01/Jul/1995', 'time': '00:00:13'}"
9,129.94.144.152,-,-,01/Jul/1995:00:00:13 -0400,GET,/,HTTP/1.0,200,7074,"{'date': '01/Jul/1995', 'time': '00:00:13'}"


In [82]:
list1 = ['date', 'time']
new_list = list()
for item in list1:
    string1 = "date_time_new['" + item + "'] as " + item
    new_list.append(string1)
print(new_list)

["date_time_new['date'] as date", "date_time_new['time'] as time"]


In [84]:
dfClean_split_date_time = dfClean_date_time.selectExpr("*", *new_list)
dfClean_split_date_time.limit(10).toPandas()

Unnamed: 0,host,client_identd,user_id,date_time,method,endpoint,protocol,response_code,content_size,date_time_new,date,time
0,199.72.81.55,-,-,01/Jul/1995:00:00:01 -0400,GET,/history/apollo/,HTTP/1.0,200,6245,"{'date': '01/Jul/1995', 'time': '00:00:01'}",01/Jul/1995,00:00:01
1,unicomp6.unicomp.net,-,-,01/Jul/1995:00:00:06 -0400,GET,/shuttle/countdown/,HTTP/1.0,200,3985,"{'date': '01/Jul/1995', 'time': '00:00:06'}",01/Jul/1995,00:00:06
2,199.120.110.21,-,-,01/Jul/1995:00:00:09 -0400,GET,/shuttle/missions/sts-73/mission-sts-73.html,HTTP/1.0,200,4085,"{'date': '01/Jul/1995', 'time': '00:00:09'}",01/Jul/1995,00:00:09
3,burger.letters.com,-,-,01/Jul/1995:00:00:11 -0400,GET,/shuttle/countdown/liftoff.html,HTTP/1.0,304,0,"{'date': '01/Jul/1995', 'time': '00:00:11'}",01/Jul/1995,00:00:11
4,199.120.110.21,-,-,01/Jul/1995:00:00:11 -0400,GET,/shuttle/missions/sts-73/sts-73-patch-small.gif,HTTP/1.0,200,4179,"{'date': '01/Jul/1995', 'time': '00:00:11'}",01/Jul/1995,00:00:11
5,burger.letters.com,-,-,01/Jul/1995:00:00:12 -0400,GET,/images/NASA-logosmall.gif,HTTP/1.0,304,0,"{'date': '01/Jul/1995', 'time': '00:00:12'}",01/Jul/1995,00:00:12
6,burger.letters.com,-,-,01/Jul/1995:00:00:12 -0400,GET,/shuttle/countdown/video/livevideo.gif,HTTP/1.0,200,0,"{'date': '01/Jul/1995', 'time': '00:00:12'}",01/Jul/1995,00:00:12
7,205.212.115.106,-,-,01/Jul/1995:00:00:12 -0400,GET,/shuttle/countdown/countdown.html,HTTP/1.0,200,3985,"{'date': '01/Jul/1995', 'time': '00:00:12'}",01/Jul/1995,00:00:12
8,d104.aa.net,-,-,01/Jul/1995:00:00:13 -0400,GET,/shuttle/countdown/,HTTP/1.0,200,3985,"{'date': '01/Jul/1995', 'time': '00:00:13'}",01/Jul/1995,00:00:13
9,129.94.144.152,-,-,01/Jul/1995:00:00:13 -0400,GET,/,HTTP/1.0,200,7074,"{'date': '01/Jul/1995', 'time': '00:00:13'}",01/Jul/1995,00:00:13
