# Mapping Crime in Vancouver

Let's just have some plotting fun!

This is loosely an HtDAP design, but we've skipped the planning stages to keep it short! We're also working from the Module 7 VPD location project for fun :)

So, edit and refactor this into something to map crime! (We've already updated the data definitions and `read` function, but not `main` or `analyze`!)

In [None]:
from cs103 import *
from typing import NamedTuple, List
from enum import Enum
import csv
import matplotlib.pyplot as pyplot

##################
# Data Definitions

CrimeData = NamedTuple('CrimeData', [('x', float),
                                     ('y', float)])
# interp. data about a single crime in Vancouver with its x and y location.
# (Locations are in metres offset from a somewhat arbitrary point on the surface of
# the earth. (Caution: locations of (0, 0) are sometimes placeholders
# or intentionally inaccurate reports. Fortunately, that doesn't occur in the 
# subset of the data we're looking at.)
CD1 = CrimeData(0, 0)
CD2 = CrimeData(-3.5, 2.0)
CD3 = CrimeData(490258.683, 5458154.503)  # sample location actually pulled from our data

# template based on compound (2 fields)
@typecheck
def fn_for_crime_data(cd: CrimeData) -> ...:
    return ...(cd.x,
               cd.y)
    

CrimeDataList = List[CrimeData]
# interp. a list of crime data
LOCD0 = []
LOCD1 = [CD1, CD2]

# template based on arbitrary-sized data and reference rule
@typecheck
def fn_for_cdl(cdl: CrimeDataList) -> ...:
    # description of accumulator
    acc = ... # type: ...
    
    for cd in cdl:
        acc = ...(fn_for_crime_data(cd), acc)
        
    return ...(acc)


FloatList = List[float]
# interp. a list of floats
LOF0 = []
LOF1 = [0, -3.5]

# template based on arbitrary-sized data
@typecheck
def fn_for_float_list(fl: FloatList) -> ...:
    # description of accumulator
    acc = ... # type: ...
    
    for f in fl:
        acc = ...(f, acc)
        
    return ...(acc)

In [None]:
@typecheck
def read(filename: str) -> CrimeDataList:
    """    
    reads information from the specified file and returns a list of crime data
    
    the file must be in the VPD crime format, and the x and y entries must be valid 
    floats.
    """
    # Note: in future, we might want to skip (0, 0) entries, but we won't now.
    
    #return []  #stub
    # Template from HtDAP

    # locd contains the result so far
    locd = [] # type: List[CrimeData]

    with open(filename) as csvfile:
        
        reader = csv.reader(csvfile)
        next(reader) # skip header line

        for row in reader:
            cd = CrimeData(parse_float(row[8]), parse_float(row[9]))
            locd.append(cd)
    
    return locd



start_testing()
expect(read("testfile_empty.csv"), []) 
expect(read("testfile_small.csv"), [CrimeData(0, 0),
                                    CrimeData(-3.5, 2.0)]) 

summary()


## Scatterplot solution from the worksheet

Our "template" in the viz module is just to copy-and-paste from a sample of the kind of plot we want. That's not so unrealistic as a starting point as long as we understand what we're using!

Here's the scatterplot worked example body as a starting point for our template:

```python
@typecheck
def show_scatterplot(ages: IntList, salaries: IntList, counts: IntList) -> None:
    """
    display a scatterplot of salaries vs. ages. salaries are given in 1000s
    
    Assumes that the lengths of ages, salaries, and counts are all equal
    """
    #return None #stub
    # Template based on visualization
    
    areas = convert_counts_to_areas(counts)

    # set the labels for the axes
    plt.xlabel('Age')
    plt.ylabel('Salary (in 1000s)')
    plt.title('Salaries by age')

    # range for the axes
    # [x-min, x-max, y-min, y-max]
    plt.axis([0,65,0,105])

    # create the scatterplot, with markers that are red (c='r') and triangular (marker='^')
    plt.scatter(ages,salaries,marker='^', c='r', s=areas)

    # show the plot
    plt.show()
    
    return None
```

In [None]:
# Here are some helper functions

@typecheck
def get_x_locations(locd: List[CrimeData]) -> List[float]:
    """
    return the x locations from locd
    """
    #return []  #stub
    # template from List[CrimeData]
    
    # acc is the result so far
    acc = [] # type: List[float]
    
    for cd in locd:
        acc.append(cd.x)
        
    return acc

start_testing()

expect(get_x_locations([]), []) 
expect(get_x_locations([CrimeData(0, 0), CrimeData(-3.5, 2.0)]), [0, -3.5])

summary()


@typecheck
def get_y_locations(locd: List[CrimeData]) -> List[float]:
    """
    return the y locations from locd
    """
    #return []  #stub
    # template from List[CrimeData]
    
    # acc is the result so far
    acc = [] # type: List[float]
    
    for cd in locd:
        acc.append(cd.y)
        
    return acc

start_testing()

expect(get_y_locations([]), []) 
expect(get_y_locations([CrimeData(0, 0), CrimeData(-3.5, 2.0)]), [0, 2.0])

summary()

In [None]:
@typecheck
def main(filename: str) -> ...:
    """
    Reads the file from given filename, analyzes the data,
    returns the result
    """
    # Template from HtDAP, based on composition
    return analyze(read(filename))

@typecheck
# You will often want to rename this function!
def analyze(loc: List[Consumed]) -> Produced:
    """
    ...
    """
    return ...

# It may be easier to read your tests
# if you wrap each set in start_testing/summary.
start_testing()

# Examples and tests for main
expect(..., ...)

# Examples and tests for analyze
expect(..., ...) 

summary()

<details class="alert alert-info"><summary style="cursor:pointer; display:list-item">ℹ️ Sample solution (For later.  Don't peek if you want to learn 🙂)</summary>
   
```python
@typecheck
def main(filename: str) -> None:
    """
    Reads the crime data from given filename and plots its locations.
    There's no computation here so this wouldn't be enough for the final 
    project submission.
    
    Returns an empty plot with a title that says "Locations of Crime in Vancouver",
    a x-axis labeled with "E/W Metres" and a y-axis labeled with "N/S Metres" if 
    there is no data.
    """
    #return None  #stub
    
    # Template from HtDAP, based on function composition 
    return plot_data(read(filename))     

@typecheck
def plot_data(locd: List[CrimeData]) -> None:
    """
    Plots the locations in locd
    """
#     return None # stub

    # Template based on visualization
    
#     areas = convert_counts_to_areas(counts)

    # set the labels for the axes
    pyplot.xlabel('E/W Metres')
    pyplot.ylabel('N/S Metres')
    pyplot.title('Locations of Crime in Vancouver')

    # range for the axes
    # [x-min, x-max, y-min, y-max]
    # If you don't have a good reason for limiting your axis labels, feel
    # free to let Python auto-size your chart for you
#     pyplot.axis([0,65,0,105])

    x_values = get_x_locations(locd) # list of x-coordinates for my data points
    y_values = get_y_locations(locd) # list of y-coordinates for my data points
    
    # The bare minimum required for pyplot.scatter() is to just provide x_vals and y_vals
    
    # You can choose different colours by using https://www.google.com/search?q=color+picker
    
    # View different options for your markers at https://matplotlib.org/stable/api/markers_api.html
    
    # Alpha determines opacity. Change this value to make the colour more "filled in".
    # 0 means transparent and 1 means a solid colour.
    pyplot.scatter(x_values, y_values, c='#daa4db', marker="^", alpha=0.5) 

    # show the plot
    pyplot.show()
    
    return None

start_testing()
# Should produce an empty plot with title: "Locations of Crime in Vancouver",
# x-axis should have a label showing east/west metres and the y axis should have a label
# showing north/south metres
expect(main("testfile_empty.csv"), None)

# Should produce an plot with title: "Locations of Crime in Vancouver",
# x-axis should have a label showing east/west metres and the y axis should have a label
# showing north/south metres.
# In addition, there should be two data points on the plot: one at (0,0) and one at (-3.5, 2.0)
expect(main("testfile_small.csv"), None)

summary()

start_testing()

# Should produce an empty plot with title: "Locations of Crime in Vancouver",
# x-axis should have a label showing east/west metres and the y axis should have a label
# showing north/south metres
expect(plot_data([]), None)

# Should produce an plot with title: "Locations of Crime in Vancouver",
# x-axis should have a label showing east/west metres and the y axis should have a label
# showing north/south metres.
# In addition, there should be two data points on the plot: one at (0,0) and one at (-3.5, 2.0)
expect(plot_data([CrimeData(0, 0), CrimeData(-3.5, 2.0)]), None)

summary()

```
</details>

In [None]:
# Use this to determine what arguments are REQUIRED and
# what other customization options are possible
help(pyplot.scatter)

In [None]:
# Here is our overall average location!
# Compare this to the map found at https://www.google.com/maps/place/Vancouver,+BC

main("crimedata_subset_bne_theft_of_bike_veh_2018.csv")