# Project 1

# Crimes in Chicago, IL from 2001 to present:
ID - Unique identifier for the record
Case Number - The Chicago Police Department RD Number (Records Division Number), which is unique to the incident.
Date - Date when the incident occurred. this is sometimes a best estimate.
Block - The partially redacted address where the incident occurred, placing it on the same block as the actual address.
IUCR - The Illinois Unifrom Crime Reporting code. This is directly linked to the Primary Type and Description.
Primary Type - The primary description of the IUCR code.
Description - The secondary description of the IUCR code, a subcategory of the primary description.
Location Description - Description of the location where the incident occurred.
Arrest - Indicates whether an arrest was made.
Domestic - Indicates whether the incident was domestic-related as defined by the Illinois Domestic Violence Act.
Beat - Indicates the beat where the incident occurred. A beat is the smallest police geographic area – each beat has a dedicated police beat car.
District - Indicates the police district where the incident occurred.
Ward - The ward (City Council district) where the incident occurred.
Community Area - Indicates the community area where the incident occurred. Chicago has 77 community areas
FBI Code - Indicates the crime classification as outlined in the FBI's National Incident-Based Reporting System (NIBRS).
X Coordinate - The x coordinate of the location where the incident occurred in State Plane Illinois East NAD 1983 projection.
Y Coordinate - The y coordinate of the location where the incident occurred in State Plane Illinois East NAD 1983 projection.
Year - Year the incident occurred.
Updated On - Date and time the record was last updated.
Latitude - The latitude of the location where the incident occurred.
Longitude - The longitude of the location where the incident occurred.
Location - The location where the incident occurred in a format that allows for creation of maps and other geographic operations on this data portal. 

In [1]:
import sys
import os

sys.path.insert(0, '/usr/hdp/current/spark2-client/python')
sys.path.insert(0, '/usr/hdp/current/spark2-client/python/lib/py4j-0.10.7-src.zip')

os.environ['SPARK_HOME'] = '/usr/hdp/current/spark2-client/'
os.environ['SPARK_CONF_DIR'] = '/etc/spark2/conf'

import pyspark
conf = pyspark.SparkConf()
conf.setMaster("yarn")
conf.set("spark.driver.memory","2g")
conf.set("spark.executor.instances","5")
conf.set("spark.executor.memory","15g")
conf.set("spark.executor.cores","8")

sc = pyspark.SparkContext(conf=conf)

In [2]:
sc

# Import data and extract header

In [3]:
crimes = sc.textFile("Crimes_-_2001_to_present2.txt")

In [4]:
crimesHeader = crimes.first() #extract header
print(crimesHeader)

ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location


In [5]:
crimesOnly = crimes.filter(lambda x: x != crimesHeader)

In [6]:
crimesOnly.take(3)

['11034701,JA366925,01/01/2001 11:00:00 AM,016XX E 86TH PL,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,RESIDENCE,false,false,0412,004,8,45,11,,,2001,08/05/2017 03:50:08 PM,,,',
 '11227287,JB147188,10/08/2017 03:00:00 AM,092XX S RACINE AVE,0281,CRIM SEXUAL ASSAULT,NON-AGGRAVATED,RESIDENCE,false,false,2222,022,21,73,02,,,2017,02/11/2018 03:57:41 PM,,,',
 '11227583,JB147595,03/28/2017 02:00:00 PM,026XX W 79TH ST,0620,BURGLARY,UNLAWFUL ENTRY,OTHER,false,false,0835,008,18,70,05,,,2017,02/11/2018 03:57:41 PM,,,']

# Map data, excluding X Coordinate, Y Coordinate, Longitude, and Latitude

In [7]:
crimesMapped = crimesOnly.map(lambda line: (line.split(",")[0],line.split(",")[1], line.split(",")[2], line.split(",")[3], line.split(",")[4], line.split(",")[5], line.split(",")[6], line.split(",")[7], line.split(",")[8], line.split(",")[9], line.split(",")[10], line.split(",")[11], line.split(",")[12], line.split(",")[13], line.split(",")[14],line.split(",")[17], line.split(",")[18], line.split(",")[21]))

crimesMapped.take(3)

[('11034701',
  'JA366925',
  '01/01/2001 11:00:00 AM',
  '016XX E 86TH PL',
  '1153',
  'DECEPTIVE PRACTICE',
  'FINANCIAL IDENTITY THEFT OVER $ 300',
  'RESIDENCE',
  'false',
  'false',
  '0412',
  '004',
  '8',
  '45',
  '11',
  '2001',
  '08/05/2017 03:50:08 PM',
  ''),
 ('11227287',
  'JB147188',
  '10/08/2017 03:00:00 AM',
  '092XX S RACINE AVE',
  '0281',
  'CRIM SEXUAL ASSAULT',
  'NON-AGGRAVATED',
  'RESIDENCE',
  'false',
  'false',
  '2222',
  '022',
  '21',
  '73',
  '02',
  '2017',
  '02/11/2018 03:57:41 PM',
  ''),
 ('11227583',
  'JB147595',
  '03/28/2017 02:00:00 PM',
  '026XX W 79TH ST',
  '0620',
  'BURGLARY',
  'UNLAWFUL ENTRY',
  'OTHER',
  'false',
  'false',
  '0835',
  '008',
  '18',
  '70',
  '05',
  '2017',
  '02/11/2018 03:57:41 PM',
  '')]

# Extract districts from data

In [8]:
districts = crimesMapped.map(lambda x: x[11])

In [9]:
def distCheck(x):
    if(len(x) == 3):
        return x
    else:
        return "000"


districts = districts.map(distCheck)
districts.take(5)

['004', '022', '008', '003', '001']

In [10]:
districtsCollection = districts.countByValue()

# 1. Districts 7, 8, and 11 have most reported crimes

In [11]:
sorted(districtsCollection.items())


[('000', 234546),
 ('001', 272390),
 ('002', 320063),
 ('003', 345623),
 ('004', 378288),
 ('005', 297620),
 ('006', 393687),
 ('007', 400956),
 ('008', 458143),
 ('009', 338534),
 ('010', 291235),
 ('011', 438337),
 ('012', 328354),
 ('014', 270459),
 ('015', 297254),
 ('016', 223876),
 ('017', 195367),
 ('018', 307761),
 ('019', 307257),
 ('020', 117318),
 ('021', 4),
 ('022', 216158),
 ('024', 205983),
 ('025', 391726),
 ('031', 196)]

# Extract Primary Type 

In [12]:
primaryType = crimesMapped.map(lambda x : x[5])
primaryType.take(5)

['DECEPTIVE PRACTICE',
 'CRIM SEXUAL ASSAULT',
 'BURGLARY',
 'THEFT',
 'CRIM SEXUAL ASSAULT']

# Zip Primary Type with Districts

In [13]:
districts.zip(primaryType).take(5)

[('004', 'DECEPTIVE PRACTICE'),
 ('022', 'CRIM SEXUAL ASSAULT'),
 ('008', 'BURGLARY'),
 ('003', 'THEFT'),
 ('001', 'CRIM SEXUAL ASSAULT')]

# 2. Number of each type of crime per district

In [14]:
distWithType = districts.zip(primaryType)
distWithTypeCollection = distWithType.countByValue()
sorted(distWithTypeCollection.items())

[(('000', 'ARSON'), 258),
 (('000', 'ASSAULT'), 31599),
 (('000', 'BATTERY'), 66276),
 (('000', 'BURGLARY'), 3178),
 (('000', 'CONCEALED CARRY LICENSE VIOLATION'), 4),
 (('000', 'CRIM SEXUAL ASSAULT'), 490),
 (('000', 'CRIMINAL DAMAGE'), 13685),
 (('000', 'CRIMINAL TRESPASS'), 7840),
 (('000', 'DECEPTIVE PRACTICE'), 7032),
 (('000', 'GAMBLING'), 232),
 (('000', 'HUMAN TRAFFICKING'), 1),
 (('000', 'INTERFERENCE WITH PUBLIC OFFICER'), 110),
 (('000', 'INTIMIDATION'), 242),
 (('000', 'KIDNAPPING'), 218),
 (('000', 'LIQUOR LAW VIOLATION'), 420),
 (('000', 'MOTOR VEHICLE THEFT'), 34948),
 (('000', 'NARCOTICS'), 12340),
 (('000', 'OBSCENITY'), 72),
 (('000', 'OFFENSE INVOLVING CHILDREN'), 1438),
 (('000', 'OTHER NARCOTIC VIOLATION'), 17),
 (('000', 'OTHER OFFENSE'), 4390),
 (('000', 'PROSTITUTION'), 16),
 (('000', 'PUBLIC INDECENCY'), 7),
 (('000', 'PUBLIC PEACE VIOLATION'), 5509),
 (('000', 'RITUALISM'), 3),
 (('000', 'ROBBERY'), 1759),
 (('000', 'SEX OFFENSE'), 1691),
 (('000', 'STALKING')

# Extract Location Description

In [15]:
locationDesc = crimesMapped.map(lambda x: x[7])
locationDesc.take(5)

['RESIDENCE', 'RESIDENCE', 'OTHER', 'RESIDENCE', 'HOTEL/MOTEL']

In [16]:
sortLocationDesc = locationDesc.countByValue()

# 3. Number of crimes per location description

In [17]:
sorted(sortLocationDesc.items(), key=lambda k_v: k_v[::-1], reverse = True) 

[('STREET', 1806413),
 ('RESIDENCE', 1189963),
 ('APARTMENT', 735277),
 ('SIDEWALK', 686894),
 ('OTHER', 265143),
 ('PARKING LOT/GARAGE(NON.RESID.)', 197666),
 ('"SCHOOL', 194052),
 ('ALLEY', 155576),
 ('RESIDENCE-GARAGE', 134818),
 ('SMALL RETAIL STORE', 126640),
 ('RESIDENCE PORCH/HALLWAY', 123220),
 ('VEHICLE NON-COMMERCIAL', 113273),
 ('RESTAURANT', 113216),
 ('GROCERY FOOD STORE', 90939),
 ('DEPARTMENT STORE', 88874),
 ('GAS STATION', 75686),
 ('RESIDENTIAL YARD (FRONT/BACK)', 73966),
 ('CHA PARKING LOT/GROUNDS', 55986),
 ('PARK PROPERTY', 54069),
 ('COMMERCIAL / BUSINESS OFFICE', 50311),
 ('BAR OR TAVERN', 38369),
 ('CTA PLATFORM', 37534),
 ('CHA APARTMENT', 36793),
 ('DRUG STORE', 31825),
 ('HOTEL/MOTEL', 29561),
 ('BANK', 28593),
 ('CTA TRAIN', 26360),
 (' BUS', 25884),
 ('CHA HALLWAY/STAIRWELL/ELEVATOR', 24998),
 ('VACANT LOT/LAND', 24319),
 ('CTA BUS', 22699),
 ('TAVERN/LIQUOR STORE', 22343),
 ('HOSPITAL BUILDING/GROUNDS', 21945),
 ('DRIVEWAY - RESIDENTIAL', 19808),
 ('POLICE

# 4. How many reported crimes lead to an arrest

In [18]:
arrest = crimesMapped.map(lambda x:x[8])


In [19]:
def arrestCheck(x):
    if(x == "true" or x =='false'):
        return x
    else:
        return " "
arrest = arrest.map(arrestCheck)

In [20]:
arrest.take(5)

['false', 'false', 'false', 'false', 'false']

In [21]:
arrestCount = arrest.countByValue()

In [22]:
sorted(arrestCount.items(), reverse = True) 

[('true', 1873009), ('false', 4923627), (' ', 234499)]

# Extract date (includes time)

In [23]:
date = crimesMapped.map(lambda x : x[2])
date.take(5)

['01/01/2001 11:00:00 AM',
 '10/08/2017 03:00:00 AM',
 '03/28/2017 02:00:00 PM',
 '09/09/2017 08:17:00 PM',
 '08/26/2017 10:00:00 AM']

# Get time only

In [24]:
def getTime(x):
    time = ""
    for element in x[11:22]: 
        time+=element
    return time


In [25]:
timeOnly = date.map(lambda x : getTime(x))
timeOnly.take(5)

['11:00:00 AM', '03:00:00 AM', '02:00:00 PM', '08:17:00 PM', '10:00:00 AM']

# Replace minutes and seconds with '00'

In [26]:
def replace_str_index(text,index=0,replacement='0'):
    return '%s%s%s'%(text[:index],replacement,text[index+1:])


In [27]:
def replaceMins(x):
    x = replace_str_index(x,3)
    y = replace_str_index(x,4)
    return y

In [28]:
timeClean = timeOnly.map(lambda x : replaceMins(x))
timeClean.take(5)

['11:00:00 AM', '03:00:00 AM', '02:00:00 PM', '08:00:00 PM', '10:00:00 AM']

In [29]:
def replaceSecs(x):
    x = replace_str_index(x,6)
    y = replace_str_index(x,7)
    return y

In [30]:
timeCleaner = timeClean.map(lambda x : replaceSecs(x))
timeCleaner.take(5)

['11:00:00 AM', '03:00:00 AM', '02:00:00 PM', '08:00:00 PM', '10:00:00 AM']

# 5. Show which times(hour + AM/PM) are most popular for committing crimes

In [31]:
timeCount = timeCleaner.countByValue()

In [32]:
sorted(timeCount.items(), key=lambda k_v: k_v[::-1], reverse = True)

[('12:00:00 PM', 401286),
 ('07:00:00 PM', 400255),
 ('08:00:00 PM', 398895),
 ('09:00:00 PM', 388660),
 ('12:00:00 AM', 388075),
 ('06:00:00 PM', 386564),
 ('10:00:00 PM', 383156),
 ('03:00:00 PM', 374305),
 ('05:00:00 PM', 359956),
 ('02:00:00 PM', 356227),
 ('04:00:00 PM', 353938),
 ('01:00:00 PM', 334557),
 ('11:00:00 PM', 316701),
 ('11:00:00 AM', 311519),
 ('09:00:00 AM', 302811),
 ('10:00:00 AM', 296212),
 ('08:00:00 AM', 238254),
 ('01:00:00 AM', 222650),
 ('02:00:00 AM', 186944),
 ('07:00:00 AM', 160078),
 ('03:00:00 AM', 150610),
 ('04:00:00 AM', 113564),
 ('06:00:00 AM', 111492),
 ('05:00:00 AM', 94426)]

# Extract year & clean

In [33]:
year = crimesMapped.map(lambda x : x[15])
year.take(5)

['2001', '2017', '2017', '2017', '2017']

In [34]:
def yearCheck(x):
    if (x[:2] == '20'):
        return x
    else:
        return ''

In [35]:
year = year.map(lambda x : yearCheck(x))

In [36]:
year.take(5)

['2001', '2017', '2017', '2017', '2017']

In [37]:
yearCount = year.countByValue()

# 6. Reproted crimes per year 

In [38]:
sorted(yearCount.items(), key=lambda k_v: k_v[::-1], reverse = True)

[('2001', 469289),
 ('2002', 469191),
 ('2003', 457559),
 ('2004', 450875),
 ('2005', 435268),
 ('2006', 430990),
 ('2007', 421537),
 ('2008', 411645),
 ('2009', 379700),
 ('2010', 357072),
 ('2011', 339670),
 ('2012', 326240),
 ('2013', 297412),
 ('2014', 267553),
 ('2016', 262766),
 ('2017', 262489),
 ('2018', 261335),
 ('2015', 257603),
 ('2019', 238442),
 ('', 234499)]

# Extract blocks

In [39]:
block = crimesMapped.map(lambda x : x[3])
block.take(5)

['016XX E 86TH PL',
 '092XX S RACINE AVE',
 '026XX W 79TH ST',
 '060XX S EBERHART AVE',
 '001XX W RANDOLPH ST']

# Get street only

In [40]:
def streetOnly(x):
    street = ""
    for element in x[6::]: 
        street+=element
    return street

In [41]:
street = block.map(lambda x : streetOnly(x))
street.take(5)

['E 86TH PL', 'S RACINE AVE', 'W 79TH ST', 'S EBERHART AVE', 'W RANDOLPH ST']

# Street with most reported crimes 

In [42]:
sorted(street.countByValue().items(), key=lambda k_v: k_v[::-1], reverse = True)

[('S STATE ST', 83497),
 ('S MICHIGAN AVE', 66030),
 ('W MADISON ST', 63642),
 ('S HALSTED ST', 63617),
 ('S DR MARTIN LUTHER KING JR DR', 57646),
 ('S ASHLAND AVE', 57614),
 ('N CLARK ST', 52921),
 ('W NORTH AVE', 51823),
 ('S COTTAGE GROVE AVE', 48368),
 ('W DIVISION ST', 41127),
 ('S WESTERN AVE', 40359),
 ('N MILWAUKEE AVE', 38541),
 ('W CHICAGO AVE', 37814),
 ('S WABASH AVE', 37585),
 ('N STATE ST', 37031),
 ('S PULASKI RD', 36318),
 ('S INDIANA AVE', 35050),
 ('W JACKSON BLVD', 34625),
 ('S KEDZIE AVE', 34401),
 ('N SHERIDAN RD', 34374),
 ('N MICHIGAN AVE', 30949),
 ('N BROADWAY', 30616),
 ('S CICERO AVE', 30236),
 ('W MONROE ST', 30100),
 ('W WASHINGTON BLVD', 29907),
 ('W ROOSEVELT RD', 29448),
 ('W BELMONT AVE', 28478),
 ('S PRAIRIE AVE', 28414),
 ('W 63RD ST', 28021),
 ('S RACINE AVE', 27846),
 ('W FULLERTON AVE', 27648),
 ('N WESTERN AVE', 26988),
 ('S WENTWORTH AVE', 26822),
 ('W LAKE ST', 26733),
 ('S STONY ISLAND AVE', 26559),
 ('S MORGAN ST', 26459),
 ('N PULASKI RD', 26

# 7. Zip districts with street, find which districts are in S State Street

In [43]:
distAndStreet = districts.zip(street)
distAndStreet.take(5)

[('004', 'E 86TH PL'),
 ('022', 'S RACINE AVE'),
 ('008', 'W 79TH ST'),
 ('003', 'S EBERHART AVE'),
 ('001', 'W RANDOLPH ST')]

In [44]:
distAndStreet.filter(lambda x: "S STATE ST" in x).countByValue()

defaultdict(int,
            {('002', 'S STATE ST'): 14162,
             ('001', 'S STATE ST'): 39979,
             ('005', 'S STATE ST'): 12790,
             ('006', 'S STATE ST'): 7596,
             ('003', 'S STATE ST'): 5053,
             ('000', 'S STATE ST'): 3451,
             ('031', 'S STATE ST'): 12,
             ('007', 'S STATE ST'): 444,
             ('018', 'S STATE ST'): 10})

Can see here: https://imgur.com/a/XYjeYnt where S State Street is and how many districts are covered by S State Street.
About half the crimes that happen on S State Street are within the 1st district.

# Get month and zip it with type

In [45]:
def getMonth(x):
    month = ""
    for element in x[:2]: 
        month+=element
    return month
    

In [46]:
month = date.map(lambda x : getMonth(x) )
month.take(5)

['01', '10', '03', '09', '08']

In [47]:
monthAndType = month.zip(primaryType)
monthAndType.take(5)

[('01', 'DECEPTIVE PRACTICE'),
 ('10', 'CRIM SEXUAL ASSAULT'),
 ('03', 'BURGLARY'),
 ('09', 'THEFT'),
 ('08', 'CRIM SEXUAL ASSAULT')]

# Get month with theft, robbery, and burglary types only

In [48]:
def typeSort(x):
    if(('THEFT' in x) or ('ROBBERY' in x) or ('BURGLARY' in x)):
        return x
    else:
        return ''
    

In [49]:
stealingTypesOnly = monthAndType.map(lambda x : typeSort(x))
stealingTypesOnly.take(5)

['', '', ('03', 'BURGLARY'), ('09', 'THEFT'), '']

# Accounts of theft, robbery, and burglary in the Spring

In [50]:
def springOnly(x):
    if(('03' in x) or ('04' in x) or ('05' in x)):
        return x
    else:
        return ''
   
   

In [51]:
springStealing =  stealingTypesOnly.map(lambda x : springOnly(x))
springStealing.take(5)

['', '', ('03', 'BURGLARY'), '', '']

In [52]:
springStealingClean = springStealing.filter(lambda x : springOnly(x) )
springStealingClean.take(5)

[('03', 'BURGLARY'),
 ('03', 'THEFT'),
 ('03', 'THEFT'),
 ('04', 'THEFT'),
 ('05', 'BURGLARY')]

In [53]:
sorted(springStealingClean.countByValue().items(), key=lambda k_v: k_v[::-1], reverse = True)

[(('05', 'THEFT'), 126783),
 (('04', 'THEFT'), 116384),
 (('03', 'THEFT'), 113281),
 (('05', 'BURGLARY'), 33663),
 (('04', 'BURGLARY'), 29868),
 (('03', 'BURGLARY'), 28532),
 (('05', 'ROBBERY'), 21935),
 (('04', 'ROBBERY'), 19385),
 (('03', 'ROBBERY'), 18621)]

In [54]:
springStealingClean.count()

508452

# Accounts of theft, robbery, and burglary in the Summer

In [55]:
def summerOnly(x):
    if(('06' in x) or ('07' in x) or ('08' in x)):
        return x
    else:
        return ''

In [56]:
summerStealing =  stealingTypesOnly.map(lambda x : summerOnly(x))
summerStealing.take(50)


['',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 ('07', 'THEFT'),
 '',
 '',
 '',
 '',
 ('07', 'THEFT'),
 '',
 '',
 '',
 '',
 '',
 ('07', 'THEFT'),
 '',
 '',
 '',
 '',
 '',
 '',
 ('07', 'THEFT'),
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '']

In [57]:
summerStealingClean =  summerStealing.filter(lambda x : summerOnly(x))

In [58]:
sorted(summerStealingClean.countByValue().items(), key=lambda k_v: k_v[::-1], reverse = True)

[(('08', 'THEFT'), 145873),
 (('07', 'THEFT'), 145672),
 (('06', 'THEFT'), 131751),
 (('08', 'BURGLARY'), 38256),
 (('07', 'BURGLARY'), 37280),
 (('06', 'BURGLARY'), 34016),
 (('08', 'ROBBERY'), 25173),
 (('07', 'ROBBERY'), 25031),
 (('06', 'ROBBERY'), 22498)]

In [59]:
summerStealingClean.count()

605550

# Accounts of theft, robbery, and burglary in the Fall

In [60]:
def fallOnly(x):
    if(('09' in x) or ('10' in x) or ('11' in x)):
        return x
    else:
        return ''

In [61]:
fallStealing =  stealingTypesOnly.map(lambda x : fallOnly(x))
fallStealing.take(5)

['', '', '', ('09', 'THEFT'), '']

In [62]:
fallStealingClean =  fallStealing.filter(lambda x : fallOnly(x))

In [63]:
sorted(fallStealingClean.countByValue().items(), key=lambda k_v: k_v[::-1], reverse = True)

[(('10', 'THEFT'), 133845),
 (('09', 'THEFT'), 132885),
 (('11', 'THEFT'), 119571),
 (('10', 'BURGLARY'), 37448),
 (('09', 'BURGLARY'), 36441),
 (('11', 'BURGLARY'), 34989),
 (('10', 'ROBBERY'), 25222),
 (('09', 'ROBBERY'), 23780),
 (('11', 'ROBBERY'), 23092)]

In [64]:
fallStealingClean.count()

567273

# Accounts of theft, robbery, and burglary in the Winter

In [65]:
def winterOnly(x):
    if(('12' in x) or ('01' in x) or ('02' in x)):
        return x
    else:
        return ''

In [66]:
winterStealing =  stealingTypesOnly.map(lambda x : winterOnly(x))
winterStealing.take(5)

['', '', '', '', '']

In [67]:
winterStealingClean =  winterStealing.filter(lambda x: winterOnly(x))


In [68]:
sorted(winterStealingClean.countByValue().items(), key=lambda k_v: k_v[::-1], reverse = True)

[(('01', 'THEFT'), 112761),
 (('12', 'THEFT'), 111916),
 (('02', 'THEFT'), 95303),
 (('12', 'BURGLARY'), 33111),
 (('01', 'BURGLARY'), 30831),
 (('02', 'BURGLARY'), 24172),
 (('12', 'ROBBERY'), 22890),
 (('01', 'ROBBERY'), 21272),
 (('02', 'ROBBERY'), 15542)]

In [69]:
winterStealingClean.count()

467798

# 8. Theft, robbery, and burglary count by season

In [70]:
springStealingClean.count()

508452

In [71]:
summerStealingClean.count()

605550

In [72]:
fallStealingClean.count()

567273

In [73]:
winterStealingClean.count()

467798

In [74]:
sc.stop()