In [40]:
# code.py

import csv
import os

# Define CSV and relative path
HOUSING_PATH = os.path.join("datasets", "housing")
csv_path = os.path.join(HOUSING_PATH, "housing.csv")

# 180 MB
def read_houses_as_tuples(filename):
    records = []
    with open(filename) as f:
        rows = csv.reader(f)
        headers = next(rows)   # Skip (not data)
        for row in rows:
            longitude = row[0]
            latitude = row[1]
            housing_median_age = row[2]
            total_rooms = row[3]
            total_bedrooms = row[4]
            population = row[5]
            households = row[6]
            median_income = float(row[7])
            median_house_value = row[8]
            ocean_proximity = row[9]

            record = (longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity)  # Tuple
            records.append(record)
    return records

In [51]:
houseTuples = read_houses_as_tuples(csv_path)
print ("Houses count" , len(houseTuples) )

#Access tuples using [] with index
islandRows = [record for record in houseTuples if record[9] == 'ISLAND']
nearBayRows = [record for record in houseTuples if record[9] == 'NEAR BAY']
print ("Island count" , len(islandRows) )
print ("Nearby Bay count" , len(nearBayRows) )

Houses count 20640
Island count 5
Nearby Bay count 2290


In [52]:
#Convert any iterator to tuples
islandRows = [record[5] for record in houseTuples if record[9] == 'ISLAND']
popultationTuple = tuple(islandRows)
popultationTuple

('744.0', '1100.0', '733.0', '341.0', '422.0')

In [50]:
#Get unique locations for income > 12 ==> Use set
maxIncomeRows = [record for record in houseTuples if record[7] >= 12]
typeOfLocationForMaxIncome = {record[9] for record in maxIncomeRows}
typeOfLocationForMaxIncome

{'<1H OCEAN', 'INLAND', 'NEAR BAY', 'NEAR OCEAN'}

In [61]:
from collections import Counter
total_distinct_latitudes = Counter() #Using counter

distinclongitudesForMaxIncome =  { (record[0],record[9]) for record in maxIncomeRows}
locationCoorForMaxIncome =  { (record[0],record[1],record[9]) for record in maxIncomeRows}

#Variable unpacking by iterating over sequences of tuples or lists
for longitude,latitude,location_type in locationCoorForMaxIncome:
    total_distinct_latitudes[longitude,location_type] += 1

print (len(total_distinct_latitudes) == len(distinclongitudesForMaxIncome))

True


In [76]:
maxIncomeRowsIsland = [record for record in houseTuples if record[7] >= 12 and record[9] == "ISLAND"]
print (len(maxIncomeRowsIsland))
maxIncomeRowsNearBay = [record for record in houseTuples if record[7] >= 12 and record[9] == "NEAR BAY"]
print (len(maxIncomeRowsNearBay))

maxIncomeRowsIslandNearBay = [] #Define List
for record in maxIncomeRowsNearBay:
  maxIncomeRowsIslandNearBay.append(record) # append or remove from list
print (len(maxIncomeRowsIslandNearBay))

maxIncomeRowsIslandNearBay = maxIncomeRowsIsland + maxIncomeRowsNearBay # Concatenating lists
print (len(maxIncomeRowsIslandNearBay))

0
18
18
18


In [77]:
#Note that list concatenation by addition is expensive operation 
#as new list must be created and the objects copied over. 
#Instead use extends
maxIncomeRowsIslandNearBay = maxIncomeRowsIsland 
print (len(maxIncomeRowsIslandNearBay))
maxIncomeRowsIslandNearBay.extend(maxIncomeRowsNearBay)
print (len(maxIncomeRowsIslandNearBay))

0
18


In [86]:
#Passing function as argument
def apply_to_list(some_list, f):    
    return [f(x) for x in some_list]

islandRows = [record[5] for record in houseTuples if record[9] == 'ISLAND']
print (islandRows)
#Passing function argument as lambda
squaredIslandRows = apply_to_list(islandRows, lambda x: x * 1 )
print (squaredIslandRows)

['744.0', '1100.0', '733.0', '341.0', '422.0']
['744.0', '1100.0', '733.0', '341.0', '422.0']


In [None]:
#TODO : sort, zipped, enumerate
#Currying
#Generators