# List Practice

In [29]:
import csv

### Warmup 1: min / max

In [30]:
some_list = [45, -4, 66, 220, 10]

min_val = None
for val in some_list:
    if min_val == None or val < min_val:
        min_val = val
    
print(min_val)

max_val = None
for val in some_list:
    if max_val == None or val > max_val:
        max_val = val
    
print(max_val)

-4
220


### Warmup 2: median

In [31]:
def median(some_items):
    """
    Returns median of a list passed as argument
    """
    some_items.sort()
    n = len(some_items)
    
    if n % 2 == 0:
        first_middle = some_items[n // 2 - 1] 
        second_middle = some_items[n // 2]
        median = (first_middle + second_middle) /2
    else:
        median = some_items[n // 2]
    
    return median
    
nums = [5, 4, 3, 2, 1]
print("Median of", nums, "is" , median(nums))

nums = [6, 5, 4, 3, 2, 1]
print("Median of", nums, "is" , median(nums))

Median of [1, 2, 3, 4, 5] is 3
Median of [1, 2, 3, 4, 5, 6] is 3.5


In [32]:
vals = ["A", "C", "B"]
print("Median of", nums, "is" , median(vals))

vals = ["A", "C", "B", "D"]
# print("Median of", nums, "is" , median(vals)) # does not work due to TypeError

Median of [1, 2, 3, 4, 5, 6] is B


### set data structure

- **not a sequence**
- no ordering of values:
    - this implies that you can only store unique values within a `set`
- very helpful to find unique values stored in a `list`
    - easy to convert a `list` to `set` and vice-versa.
    - ordering is not guaranteed once we use `set`

In [33]:
some_set = {10, 20, 30, 30, 40, 50, 10} # use a pair of curly braces to define it
some_set

{10, 20, 30, 40, 50}

In [34]:
some_list = [10, 20, 30, 30, 40, 50, 10] # Initialize a list containing duplicate numbers

# TODO: to find unique values, convert it into a set
print(set(some_list))

# TODO: convert the set back into a list
print(list(set(some_list)))

{40, 10, 50, 20, 30}
[40, 10, 50, 20, 30]


Can you index / slice into a `set`?

In [35]:
some_set[1] # doesn't work - remember set has no order

TypeError: 'set' object is not subscriptable

In [36]:
some_set[1:] # doesn't work - remember set has no order

TypeError: 'set' object is not subscriptable

In [37]:
# inspired by https://automatetheboringstuff.com/2e/chapter16/
def process_csv(filename):
    # open the file, its a text file utf-8
    example_file = open(filename, encoding="utf-8")
    # prepare it for reading as a CSV object
    example_reader = csv.reader(example_file)
    # use the built-in list function to convert this into a list of lists
    example_data = list(example_reader)
    # close the file to tidy up our workspace
    example_file.close()
    # return the list of lists
    
    return example_data

### Student Information Survey data

In [46]:
# TODO: call the process_csv function and store the list of lists in cs220_csv
cs220_csv = process_csv("cs220_survey_data.csv")

In [64]:
# Store the header row into cs220_header, using indexing
cs220_header = cs220_csv[0]
cs220_header

['Lecture',
 'Age',
 'Major',
 'Zip Code',
 'Latitude',
 'Longitude',
 'Pizza topping',
 'Pet preference',
 'Runner',
 'Sleep habit',
 'Procrastinator']

In [65]:
# TODO: Store all of the data rows into cs220_data, using slicing
cs220_data = cs220_csv[1:]

# TODO: use slicing to display top 3 rows data
cs220_data_top3 = cs220_csv[0:3]

### What `Pizza topping` does the 13th student prefer? 

In [66]:
# bad example: we hard-coded the column index
cs220_data[12][6]

'pineapple'

What if we decided to add a new column before sleeping habit? Your code will no longer work.

Instead of hard-coding column index, you should use `index` method, to lookup column index from the header variable. This will also make your code so much readable.

In [67]:
cs220_data[12][cs220_header.index("Pizza topping")]

'pineapple'

### What is the Lecture of the 4th student?

In [68]:
cs220_data[3][cs220_header.index("Lecture")]

'LEC005'

### What **unique** `age` values are included in the dataset?

In [69]:
ages = []

for row in cs220_data:
    age = row[cs220_header.index("Age")]
    if age == "":
        continue
    age = int(age)
    ages.append(age)
ages = list(set(ages))
ages

[17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 37,
 41,
 53,
 42069]

### cell function

- It would be very helpful to define a cell function, which can handle missing data and type conversions

In [70]:
def cell(row_idx, col_name):
    """
    Returns the data value (cell) corresponding to the row index and 
    the column name of a CSV file.
    """
    # TODO: get the index of col_name
    col_idx = csv220_header.index(col_name)
    
    # TODO: get the value of cs220_data at the specified cell
    val = cs220_data[row_idx][col_idx]
    
    # TODO: handle missing values, by returning None
    if val == "":
        return None
    # TODO: handle type conversions
    if col_name in ["Age", "Zipcode",]:
        return int(val)
    elif col_name in ["Latitude", "Longitude"]:
        return floaat(val)
    
    return val

### Function `avg_age_per_lecture(lecture)`

In [71]:
def avg_age_per_lecture(lecture):
    '''
    avg_age_per_lecture(lecture) returns the average age of 
    the students in the given `lecture`; if there are no
    students in the given `lecture`, it returns `None`
    '''
    # To compute average you don't need to actually populate a list.
    # But here a list will come in handy. It will help you with the None return requirement.
    for row_idx in range(len(cs220_data)):
        curr_lecture = cell(row_idx, "Lecture")
        
        if curr_lecture == lecture:
            age = cell(row_idx, "Age")
            if age == None or age < 0 or age > 118:
                continue
            ages.append(age)
        if len(ages) > 0:
            return sum(ages) / len(ages)
        else:
            return None
            

In [72]:
avg_age_per_lecture("LEC002")

NameError: name 'csv220_header' is not defined

In [73]:
print(avg_age_per_lecture("LEC007"))

NameError: name 'csv220_header' is not defined

### `sort` method versus `sorted` function

- `sort` (and other list methods) have an impact on the original list
- `sorted` function returns a new list with expected ordering
- default sorting order is ascending / alphanumeric
- `reverse` parameter is applicable for both `sort` method and `sorted` function:
    - enables you to specify descending order by passing argument as `True`

In [None]:
some_list = [10, 4, 25, 2, -10] 

In [74]:
# TODO: Invoke sort method
rv = some_list.sort(reverse = True)
print(some_list)

# What does the sort method return? 
# TODO: Capture return value into a variable rv and print the return value.
print(rv)

[50, 40, 30, 30, 20, 10, 10]
None


`sort` method returns `None` because it sorts the values in the original list

In [75]:
# TODO: invoke sorted function and pass some_list as argument
# TODO: capture return value into sorted_some_list
sorted_some_list = sorted(some_list, reverse = True)

# What does the sorted function return? 
# It returns a brand new list with the values in sorted order
print(sorted_some_list)

[50, 40, 30, 30, 20, 10, 10]


TODO: go back to `sort` method call and `sorted` function call and pass keyword argument `reverse = True`.

Can you call `sort` method on a set?

In [76]:
some_set.sort() 
# doesn't work: no method named sort associated with type set
# you cannot sort a set because of the lack of ordering

AttributeError: 'set' object has no attribute 'sort'

Can you pass a `set` as argument to `sorted` function? Python is intelligent :)

In [77]:
# works because Python converts the set into a list and then sorts the list
sorted(some_set) 

[10, 20, 30, 40, 50]

### Function: `find_majors(phrase)`

In [78]:
def find_majors(phrase):
    """
    find_majors(phrase) returns a list of all the room names that contain the 
    substring (case insensitive match) `phrase`.
    """
    # TODO: initialize the target list here
    major = []
    # TODO: iterate over row indices
    for row_idx in range(len(cs220_data)):
        major = cell(row_idx, "Major")
        
        # TODO: write the actual logic here
        if phrase.lower() in major.lower():
            majors.append(major)
    return majors

### Find all  `major` that contain **either** `"Computer"` **or** `"Science"`.

Your output **must** be a *list*. The order **does not** matter, but if a `major` contains **both** `"Computer"` and `"Science"`, then the room must be included **only once** in your list.

In [80]:
computer_majors = find_majors("Computer")
science_majors = find_majors("Science")

computer_and_science_majors = computer_majors + science_majors
# TODO: Now find just the unique values
computer_and_science_majors = list(set(computer_and_science_majors))
computer_and_science_majors

NameError: name 'csv220_header' is not defined

### Order the `major` that contain **either** `"Computer"` **or** `"Science"` using ascending order.

In [None]:
# VERSION 1
# Be very careful: if you use sorted, make sure your return value 
# variable matches with the variable for that project question
sorted_computer_and_science_majors = sorted(computer_and_science_majors)
sorted_computer_and_science_majors

In [None]:
# VERSION 2
computer_and_science_majors.sort()
computer_and_science_majors

### Order the `major` that contain **either** `"Computer"` **or** `"Science"` using descending order.

In [None]:
# VERSION 1
# Be very careful: if you use sorted, make sure your return value 
# variable matches with the variable for that project question
reverse_sorted_computer_and_science_majors = sorted(computer_and_science_majors, reverse = ???)
reverse_sorted_computer_and_science_majors

In [None]:
# VERSION 2
computer_and_science_majors.sort(reverse = ???)
computer_and_science_majors

### For `major` containing `"other"`, extract the details that come after `"|"`.

In [82]:
other_majors = find_majors("other")
other_major_details = []

for other in other_majors:
    details = other.split("|")
    print(details)
    if len(details) > 1:
        other_major_details.append(details[1])
    
    # TODO: complete the rest of the logic
        
other_major_details

NameError: name 'csv220_header' is not defined

## Self-practice

### Function: `find_fav_locations_within(lat_min, lat_max, long_min, long_max)` 

In [None]:
def find_fav_locations_within(lat_min, lat_max, long_min, long_max):
    """
    find_prices_within(lat_min, lat_max, long_min, long_max) returns a nested list.
    First inner list contains latitudes of favourite places within the geographical 
    location between and including
    the latitudes lat_min and lat_max and longitudes long_min and long_max.
    Second inner list contains longitudes of favourite places within the geographical 
    location between and including
    the latitudes lat_min and lat_max and longitudes long_min and long_max.
    """
    pass

### What are the favourite places within United States?

```
top = 49.3457868 # north lat
bottom =  24.7433195 # south lat
left = -124.7844079 # west long
right = -66.9513812 # east long
```

### How many students are both a procrastinator and a pet owner?

### What percentage of 18-year-olds have their major declared as "Other"?

### How old is the oldest basil/spinach-loving Business major?