# Setup

In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import datetime
plt.style.use('dark_background')
import sys
sys.path.insert(1, '/home/mauricio/code/mcr')
from mcr.util import glimpse, plot_value_counts, plot_value_counts_timeseries, missing_report, plot_missing, plot_unique, plot_duplicates, size

from pyspark import SparkContext
sc = SparkContext.getOrCreate()
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

from pyspark.sql import functions as F
from pyspark.sql.types import *

23/05/02 19:30:41 WARN Utils: Your hostname, rig resolves to a loopback address: 127.0.1.1; using 192.168.0.102 instead (on interface enp6s0)
23/05/02 19:30:41 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/05/02 19:30:41 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [2]:
# Define a new schema using the StructType method
schema = StructType(
    [
        StructField("No.", LongType(), True),
        StructField("MLSID", StringType(), True),
        StructField("StreetNumberNumeric", LongType(), True),
        StructField("streetaddress", StringType(), True),
        StructField("STREETNAME", StringType(), True),
        StructField("PostalCode", LongType(), True),
        StructField("StateOrProvince", StringType(), True),
        StructField("City", StringType(), True),
        StructField("SalesClosePrice", LongType(), True),
        StructField("LISTDATE", DateType(), True),
        StructField("LISTPRICE", LongType(), True),
        StructField("LISTTYPE", StringType(), True),
        StructField("OriginalListPrice", LongType(), True),
        StructField("PricePerTSFT", DoubleType(), True),
        StructField("FOUNDATIONSIZE", LongType(), True),
        StructField("FENCE", StringType(), True),
        StructField("MapLetter", StringType(), True),
        StructField("LotSizeDimensions", StringType(), True),
        StructField("SchoolDistrictNumber", StringType(), True),
        StructField("DAYSONMARKET", LongType(), True),
        StructField("offmarketdate", DateType(), True),
        StructField("Fireplaces", LongType(), True),
        StructField("RoomArea4", StringType(), True),
        StructField("roomtype", StringType(), True),
        StructField("ROOF", StringType(), True),
        StructField("RoomFloor4", StringType(), True),
        StructField("PotentialShortSale", StringType(), True),
        StructField("PoolDescription", StringType(), True),
        StructField("PDOM", LongType(), True),
        StructField("GarageDescription", StringType(), True),
        StructField("SQFTABOVEGROUND", LongType(), True),
        StructField("Taxes", LongType(), True),
        StructField("RoomFloor1", StringType(), True),
        StructField("RoomArea1", StringType(), True),
        StructField("TAXWITHASSESSMENTS", DoubleType(), True),
        StructField("TAXYEAR", LongType(), True),
        StructField("LivingArea", LongType(), True),
        StructField("UNITNUMBER", StringType(), True),
        StructField("YEARBUILT", LongType(), True),
        StructField("ZONING", StringType(), True),
        StructField("STYLE", StringType(), True),
        StructField("ACRES", DoubleType(), True),
        StructField("CoolingDescription", StringType(), True),
        StructField("APPLIANCES", StringType(), True),
        StructField("backonmarketdate", DateType(), True),
        StructField("ROOMFAMILYCHAR", StringType(), True),
        StructField("RoomArea3", StringType(), True),
        StructField("EXTERIOR", StringType(), True),
        StructField("RoomFloor3", StringType(), True),
        StructField("RoomFloor2", StringType(), True),
        StructField("RoomArea2", StringType(), True),
        StructField("DiningRoomDescription", StringType(), True),
        StructField("BASEMENT", StringType(), True),
        StructField("BathsFull", LongType(), True),
        StructField("BathsHalf", LongType(), True),
        StructField("BATHQUARTER", LongType(), True),
        StructField("BATHSTHREEQUARTER", LongType(), True), # Pandas turns to float64 due to missing values
        StructField("Class", StringType(), True),
        StructField("BATHSTOTAL", LongType(), True),
        StructField("BATHDESC", StringType(), True),
        StructField("RoomArea5", StringType(), True),
        StructField("RoomFloor5", StringType(), True),
        StructField("RoomArea6", StringType(), True),
        StructField("RoomFloor6", StringType(), True),
        StructField("RoomArea7", StringType(), True),
        StructField("RoomFloor7", StringType(), True),
        StructField("RoomArea8", StringType(), True),
        StructField("RoomFloor8", StringType(), True),
        StructField("Bedrooms", LongType(), True),
        StructField("SQFTBELOWGROUND", LongType(), True),
        StructField("AssumableMortgage", StringType(), True),
        StructField("AssociationFee", LongType(), True),
        StructField("ASSESSMENTPENDING", StringType(), True),
        StructField("AssessedValuation", DoubleType(), True),
    ]
)

In [3]:
# Load the CSV file
# inferSchema doesn't parse dates:
# df = spark.read.format('csv').options(Header=True).options(inferSchema=True).options(dateFormat='M/d/y H:m').load('2017_StPaul_MN_Real_Estate.csv', dateFormat='M/d/y H:m')
# schema does parse dates:
df = spark.read.format('csv').options(Header=True).options(dateFormat='M/d/y H:m').load('2017_StPaul_MN_Real_Estate.csv', schema=schema)
# In case we want to cast StringType to TimestampType or DateType after read
# df = df.withColumn('LISTDATE', F.to_timestamp('LISTDATE', format='M/d/y H:m'))
# df = df.withColumn('LISTDATE', F.to_date('LISTDATE', format='M/d/y H:m'))
# df = df.withColumn('offmarketdate', F.to_timestamp('offmarketdate', format='M/d/y H:m'))
# df = df.withColumn('offmarketdate', F.to_date('offmarketdate', format='M/d/y H:m'))

In [4]:
# Stanrdaize column names
from re import sub
df = df.selectExpr([f"`{c}` as " + sub(r'\.', '', c).upper() for c in df.columns])

In [5]:
# Save imported CSV->Dataframe to Parquet and read it back
df.write.parquet('Real_Estate.parq', mode='overwrite')
df = spark.read.parquet('Real_Estate.parq')
df.printSchema()

23/05/02 19:30:44 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
[Stage 0:>                                                          (0 + 1) / 1]

root
 |-- NO: long (nullable = true)
 |-- MLSID: string (nullable = true)
 |-- STREETNUMBERNUMERIC: long (nullable = true)
 |-- STREETADDRESS: string (nullable = true)
 |-- STREETNAME: string (nullable = true)
 |-- POSTALCODE: long (nullable = true)
 |-- STATEORPROVINCE: string (nullable = true)
 |-- CITY: string (nullable = true)
 |-- SALESCLOSEPRICE: long (nullable = true)
 |-- LISTDATE: date (nullable = true)
 |-- LISTPRICE: long (nullable = true)
 |-- LISTTYPE: string (nullable = true)
 |-- ORIGINALLISTPRICE: long (nullable = true)
 |-- PRICEPERTSFT: double (nullable = true)
 |-- FOUNDATIONSIZE: long (nullable = true)
 |-- FENCE: string (nullable = true)
 |-- MAPLETTER: string (nullable = true)
 |-- LOTSIZEDIMENSIONS: string (nullable = true)
 |-- SCHOOLDISTRICTNUMBER: string (nullable = true)
 |-- DAYSONMARKET: long (nullable = true)
 |-- OFFMARKETDATE: date (nullable = true)
 |-- FIREPLACES: long (nullable = true)
 |-- ROOMAREA4: string (nullable = true)
 |-- ROOMTYPE: string (nu

                                                                                

In [6]:
with pd.option_context('display.max_rows', 100, 'display.max_colwidth',1000):
    display(glimpse(df.toPandas()))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Columns: 74 entries, NO to ASSESSEDVALUATION
dtypes: float64(5), int64(22), object(47)
memory usage: 15.3 MB


Unnamed: 0,field,non-null count,missing ratio,data type,unique count,unique preview
0,ACRES,5000,0.0,float64,570,"[0.0, 0.002, 0.005, 0.007, 0.01, ..., 5.41, 6.0, 6.035, 9.27, 9.47]"
1,APPLIANCES,4669,0.0662,object,743,"[Cooktop, Dishwasher, Refrigerator, Cooktop, Dishwasher, Refrigerator, Washer, Dryer, Cooktop, Dishwasher, Refrigerator, Washer, Dryer, Disposal, Cooktop, Exhaust Fan/Hood, Dishwasher, Refrigerator, Washer, Dryer, Water Softener - Owned, Cooktop, Exhaust Fan/Hood, Refrigerator, Washer, Dryer, ..., Wall Oven, Microwave, Exhaust Fan/Hood, Dishwasher, Refrigerator, Washer, Dryer, Disposal, Wall Oven, Microwave, Exhaust Fan/Hood, Dishwasher, Refrigerator, Washer, Dryer, Water Softener - Owned, Disposal, Washer, Dryer, Water Softener - Owned, Water Softener - Owned, Disposal, Air-To-Air Exchanger, Furnace Humidifier]"
2,ASSESSEDVALUATION,5000,0.0,float64,756,"[0.0, 1.0, 100.0, 1000.0, 1001.0, ..., 930.0, 940.0, 952.56, 978.0, 980.58]"
3,ASSESSMENTPENDING,5000,0.0,object,3,"[No, Unknown, Yes]"
4,ASSOCIATIONFEE,5000,0.0,int64,332,"[0, 10, 100, 1000, 101, ..., 957, 96, 98, 983, 99]"
5,ASSUMABLEMORTGAGE,1896,0.6208,object,4,"[Information Coming, Not Assumable, Yes w/ Qualifying, Yes w/No Qualifying]"
6,BACKONMARKETDATE,0,1.0,object,0,[]
7,BASEMENT,5000,0.0,object,670,"[Concrete Block, Concrete Block, Poured Concrete, Concrete Block, Slab, Unfinished, Concrete Block, Unfinished, Crawl Space, ..., Walkout, Sump Pump, Walkout, Sump Pump, Concrete Block, Walkout, Sump Pump, Day/Lookout Windows, Walkout, Sump Pump, Poured Concrete, Unfinished, Walkout, Unfinished]"
8,BATHDESC,4634,0.0732,object,887,"[1/2 Basement, 1/2 Basement, Basement, 1/2 Basement, Rough In, Basement, 3/4 Basement, 3/4 Basement, Basement, ..., Upper Level Full Bath , Walk-In Shower, Upper Level Full Bath , Walk-In Shower, 3/4 Basement, Upper Level Full Bath , Walk-In Shower, Basement, Upper Level Full Bath , Whirlpool, Whirlpool]"
9,BATHQUARTER,5000,0.0,int64,3,"[0, 1, 2]"


# Choosing the Algorithm

## pySpark ML

    Predict Category
    
        Unlabeled data -> Clustering -> ml.clustering
        Labeled data ->  Classification -> ml.classification

    Predicty Quantity
    
        Regression -> ml.regression

    Predict Similarity

        Basket Based -> Association Rules -> ml.fpm
        User Based -> Collaboration Filtering -> ml.recommendation

## PySpark Regression Methods
Methods in ml.regression :

* GeneralizedLinearRegression
* IsotonicRegression
* LinearRegression

These first methods differ mostly in how they regularize, which means how they prevent themselves from finding overly complex solutions that are likely to overfit the data. While these methods can be powerful if used correctly, they require a lot of upfront work to ensure their assumptions are met.

* DecisionTreeRegression
* GBTRegression
* RandomForestRegression

ml regression also contains tree-based methods which have the ability to easily handle things like missing and categorical values right out of the box. Decision Trees are easy to interpret but a lot of work needs to go in to prevent overfitting. So now we are down to two algorithms, RandomForest, and GBTRegression which differ in how they handle the error reduction.

Both Random Forest and Gradient Boosted Trees models are examples of ensemble models. They combine many smaller models together to create a more powerful model.

## Test and Train Splits for Time Series

If you've had some exposure to machine learning you may have seen the crucial step of splitting your data into test and training sets, **which needs to be done before applying feature transformations**.

Commonly data is split randomly. **Ours contains a time component so splitting randomly would leak information about what happens in the future.**

To **prevent** this you can **split your data sequentially and train your model on the first sequences and then test it with the last.**

The **size of your sets depends on how far out you need to forecast**. Doing **incremental testing is called step-forward optimization.**

[Avoiding Data Leakage in Machine Learning](https://www.kaggle.com/c/santander-value-prediction-challenge/discussion/61408)

Special Case: Time Series

Data leakage can be especially tricky with time series. Not only do we have to worry about the pitfalls above, we also need to make sure we are not leaking information from the future into the past. When dealing with time series, it would be a mistake to randomly split the data into train and test sets. That would lead to, for example, making predictions about 2016 using data from 2017.

**Walk-Forward optimization for time series**

![Image](https://conlanscientific.com/media/content/walk-forward-optimization.png)

The data from 2017 was obviously not available in 2016. When the final model is deployed, no data from the subsequent year will be available. Like other forms of data leakage, predicting the past with the future will cause us to overestimate the performance of the model. In time series, we use walk-forward optimization to dynamically (and chronologically) split training and test data. Walk-forward optimization mimics the appearance of training your model once each year for use in the following year, which is a realistic application.

## 10. Test and Train Splits for Time Series (II)

Here, we'll create **just one of the sequential test/train splits**, with some added logic you could build out **walk-forward optimization** seen previously.

### According to the course

In [7]:
# Create variables for max and min dates in our dataset
max_date = F.lit(df.agg({'OFFMARKETDATE': 'max'}).collect()[0][0])
min_date = F.lit(df.agg({'OFFMARKETDATE': 'min'}).collect()[0][0])
# print(f'{min_date=}, {max_date=}')
# Find how many days our data spans
range_in_days = F.datediff(max_date, min_date)
# print(f'{range_in_days=}')
# Find the date to split the dataset on
split_in_days = F.round(range_in_days * 0.8).cast('integer')
# print(f'{split_in_days=}')
split_date = F.date_add(min_date, split_in_days)
# print(f'{split_date=}')

# Split the data into 80% train, 20% test
train_df = df.select('NO').where(df['OFFMARKETDATE'] < split_date)
# Here we also avoid test set OFFMARKETDATE whose LISTDATE occurs before split_date avoiding information leakage
test_df = df.select('NO').where(df['OFFMARKETDATE'] >= split_date).where(df['LISTDATE'] <= split_date)
print(f'Total: {df.count()} Train: {train_df.count()} Test: {test_df.count()} Ignored: {df.count()-train_df.count()-test_df.count()}')

Total: 5000 Train: 4585 Test: 345 Ignored: 70


### My alternative seems better

* Avoids selecting all fields to just aggregate one.
* Aggregates min() and max() at once and get data via first() rathen then twice collect()[0][0].
* Uses pre-evaluated constants rather than lazy `col()` evaluation from Spark.

Results:

* Less tasks (8 to 10)
* Same time (6s)
* Less Input (8.6MiB to 8.7MiB)
* Less shuffle read (64B to 118B)
* Less Storage Memory (291.1KiB to 338.6 KiB)
* Less Query Duration (0.21s to 0.381s)

In [8]:
# Create variables for max and min dates in our dataset
min_date, max_date = df.select('OFFMARKETDATE').agg(F.min('OFFMARKETDATE'), F.max('OFFMARKETDATE')).first()
# print(f'{min_date=}, {max_date=}')
# Find how many days our data spans
range_in_days = (max_date - min_date).days
# print(f'{range_in_days=}')
# Find the date to split the dataset on
split_in_days = int(range_in_days * 0.8)
# print(f'{split_in_days=}')
split_date = min_date + datetime.timedelta(days=split_in_days)
print(f'{split_date=}')

# Split the data into 80% train, 20% test
train_df = df.select('NO').where(df['OFFMARKETDATE'] < split_date)
# Here we also avoid test set OFFMARKETDATE whose LISTDATE occurs before split_date avoiding information leakage
test_df = df.select('NO').where(df['OFFMARKETDATE'] >= split_date).where(df['LISTDATE'] <= split_date)
print(f'Total: {df.count()} Train: {train_df.count()} Test: {test_df.count()} Ignored: {df.count()-train_df.count()-test_df.count()}')

split_date=datetime.date(2017, 11, 18)
Total: 5000 Train: 4585 Test: 345 Ignored: 70


## Exercises

### Creating Time Splits

In [9]:
def train_test_split_date(df, split_col, test_days):
    split_date = None
    if isinstance(test_days, float):
        print('is float')
        max_date = df.agg({split_col: 'max'}).collect()[0][0]
        min_date = df.agg({split_col: 'min'}).collect()[0][0]
        split_in_days = int((max_date - min_date).days * test_days) + 1
        split_date = max_date - datetime.timedelta(days=split_in_days)
    elif isinstance(test_days, int):
        print('is int')
        max_date = df.agg({split_col: 'max'}).collect()[0][0]
        split_date = max_date - datetime.timedelta(days=test_days)
    return split_date

In [10]:
# Find the date to use in spitting test and train
split_date = train_test_split_date(df, 'OFFMARKETDATE', 45)
print(f'{split_date=}')
# Create Sequential Test and Training Sets
train_df = df.where(df['OFFMARKETDATE'] < split_date) 
test_df = df.where(df['OFFMARKETDATE'] >= split_date).where(df['LISTDATE'] <= split_date)
print(f'Total: {df.count()} Train: {train_df.count()} Test: {test_df.count()} Ignored: {df.count()-train_df.count()-test_df.count()}')

is int
split_date=datetime.date(2017, 12, 10)
Total: 5000 Train: 4828 Test: 154 Ignored: 18


In [11]:
# Find the date to use in spitting test and train
split_date = train_test_split_date(df, 'OFFMARKETDATE', 0.132)
print(f'{split_date=}')
# Create Sequential Test and Training Sets
train_df = df.where(df['OFFMARKETDATE'] < split_date) 
test_df = df.where(df['OFFMARKETDATE'] >= split_date).where(df['LISTDATE'] <= split_date)
print(f'Total: {df.count()} Train: {train_df.count()} Test: {test_df.count()} Ignored: {df.count()-train_df.count()-test_df.count()}')

is float
split_date=datetime.date(2017, 12, 10)
Total: 5000 Train: 4828 Test: 154 Ignored: 18


### Adjusting Time Features

We have mentioned throughout this course some of the dangers of leaking information to your model during training. Data leakage will cause your model to have very optimistic metrics for accuracy but once real data is run through it the results are often very disappointing.

In this exercise, we are going to ensure that DAYSONMARKET only reflects what information we have at the time of predicting the value. I.e., if the house is still on the market, we don't know how many more days it will stay on the market. We need to adjust our test_df to reflect what information we currently have as of 2017-12-10. 

In [12]:
split_date = F.to_date(F.lit('2017-12-10'))
# Create Sequential Test set
test_df = df.where(df['OFFMARKETDATE'] >= split_date).where(df['LISTDATE'] <= split_date)

# Create a copy of DAYSONMARKET to review later
test_df = test_df.withColumn('DAYSONMARKET_Original', test_df['DAYSONMARKET'])

# Recalculate DAYSONMARKET from what we know on our split date
test_df = test_df.withColumn('DAYSONMARKET', F.datediff(split_date, 'LISTDATE'))

# Review the difference
test_df[['LISTDATE', 'OFFMARKETDATE', 'DAYSONMARKET_Original', 'DAYSONMARKET']].show()

+----------+-------------+---------------------+------------+
|  LISTDATE|OFFMARKETDATE|DAYSONMARKET_Original|DAYSONMARKET|
+----------+-------------+---------------------+------------+
|2017-10-06|   2018-01-24|                  110|          65|
|2017-09-18|   2017-12-12|                   82|          83|
|2017-11-07|   2017-12-12|                   35|          33|
|2017-10-30|   2017-12-11|                   42|          41|
|2017-07-14|   2017-12-19|                  158|         149|
|2017-10-25|   2017-12-20|                   45|          46|
|2017-12-07|   2017-12-23|                   16|           3|
|2017-11-22|   2017-12-16|                   24|          18|
|2017-10-27|   2017-12-13|                   47|          44|
|2017-09-29|   2017-12-12|                   12|          72|
|2017-11-28|   2017-12-11|                   13|          12|
|2017-09-09|   2018-01-17|                  119|          92|
|2017-11-18|   2017-12-15|                   26|          22|
|2017-12

### Evaluating a Model
from pyspark.ml.evaluation import RegressionEvaluator
# Select columns to compute test error
evaluator = RegressionEvaluator(labelCol="SALESCLOSEPRICE",
predictionCol="Prediction_Price")
# Create evaluation metrics
rmse = evaluator.evaluate(predictions, {evaluator.metricName: "rmse"})
r2 = evaluator.evaluate(predictions, {evaluator.metricName: "r2"})
# Print Model Metrics
print('RMSE: ' + str(rmse))
print('R^2: ' + str(r2))

# Feature Engineering Assumptions for RFR

## Assumptions Needed for Features
**Random Forest Regression**

The lack of assumptions needed for Random Forest Regression make it and its related methods some of the most popular choices for predicting continuous values! 

* Skewed/Non Normal Data? OK
* Unscaled? OK
* Missing Data? OK
* Categorical Data? OK

## Appended Features
**Economic**
* 30 Year Mortgage Rates
    * 30 year Mortgage Rate to see how much people are willing to pay depending on their rate.

**Governmental**  
City data to see how unique a house is in the area or if it is exceptionally cheap or expensive.
* Median Home Price for City
* Home Age Percentages for City
* Home Size Percentages for City

**Social**  
Transportation metrics can help us understand how much people are willing to pay for a convenient location.
* Walk Score
* Bike Score

**Seasonal**  
Bank holidays to see if that impacted how or when houses were sold.
* Bank Holidays

pyspark how to create a column from a list of values## Engineered Features
**Temporal Features**  
Time components like the month or the week that a holiday falls on are needed help attribute seasonal effects.
* Limited value with one year of data
* Holiday Weeks

**Rates, Ratios, Sums**  
Valuable but often the hardest to create features are rates, ratios and other generated features that need either business or personal context to create.
* Business Context
* Personal Context

**Expanded Features**  
Lastly, choosing whether or not to expand compound fields is ultimately a judgment call and may be something to consider in the second iteration of modeling.
* Non-Free Form Text Columns
* Need to Remove Low Observations

In [13]:
# What is shape of our data?
print((df.count(), len(df.columns)))

(5000, 74)


## Dataframe Columns to Feature Vectors
Pyspark ML algorithms require all of the features to be provided in a single column of type vector.

We will need to convert our columns for Random Forest Regression to work.

To do this we need to import the VectorAssembler transformer to use it later. Sadly, while Random Forest Regression can handle missing values, vectors cannot.

**Due to the nature of how tree-based machine learning partitions data, we can just assign missings a value that is outside the existing range of the variable to replace nulls with, in this case, 1.**

But first, we need to know which columns to convert. We can take the list of column names and remove our dependent variable so the vector contains only features.

In [14]:
from pyspark.ml.feature import VectorAssembler
# Replace Missing values
df = df.fillna(-1)
# Define the columns to be converted to vectors
# features_cols = df.columns
features_cols = [col for col, dtype in df.dtypes if dtype not in(['string', 'date'])]
# Remove the dependent variable from the list
features_cols = [col for col in features_cols if col not in ['SALESCLOSEPRICE', 'NO', 'STREETNUMBERNUMERIC']]
print(len(features_cols))
print(features_cols)

24
['POSTALCODE', 'LISTPRICE', 'ORIGINALLISTPRICE', 'PRICEPERTSFT', 'FOUNDATIONSIZE', 'DAYSONMARKET', 'FIREPLACES', 'PDOM', 'SQFTABOVEGROUND', 'TAXES', 'TAXWITHASSESSMENTS', 'TAXYEAR', 'LIVINGAREA', 'YEARBUILT', 'ACRES', 'BATHSFULL', 'BATHSHALF', 'BATHQUARTER', 'BATHSTHREEQUARTER', 'BATHSTOTAL', 'BEDROOMS', 'SQFTBELOWGROUND', 'ASSOCIATIONFEE', 'ASSESSEDVALUATION']


In [15]:
# Create the vector assembler transformer
vec = VectorAssembler(inputCols=features_cols, outputCol='features')
# Apply the vector transformer to data
df = vec.transform(df)
print((df.count(), len(df.columns)))

(5000, 75)


In [16]:
# Select only the feature vectors and the dependent variable
ml_ready_df = df.select(['SALESCLOSEPRICE', 'features'])
# Inspect Results
ml_ready_df.show(5, truncate=False)

+---------------+----------------------------------------------------------------------------------------------------------------------------------------------+
|SALESCLOSEPRICE|features                                                                                                                                      |
+---------------+----------------------------------------------------------------------------------------------------------------------------------------------+
|143000         |[55042.0,139900.0,139900.0,145.9184,980.0,10.0,0.0,10.0,980.0,1858.0,1858.0,2017.0,980.0,1950.0,1.28,1.0,1.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0]     |
|190000         |[55042.0,210000.0,210000.0,85.2783,1144.0,4.0,0.0,4.0,1268.0,1640.0,1640.0,2017.0,2228.0,1971.0,0.32,1.0,0.0,0.0,2.0,3.0,4.0,960.0,0.0,0.0]   |
|225000         |[55042.0,225000.0,225000.0,204.1742,1102.0,28.0,0.0,28.0,1102.0,2390.0,2390.0,2016.0,1102.0,1949.0,0.822,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0] |
|265000         |[55042.0,230000.0

## Exercises

* Missing values are handled by Random Forests internally where they partition on missing values. As long as you replace them with something outside of the range of normal values, they will be handled correctly.

* Likewise, categorical features only need to be mapped to numbers, they are fine to stay all in one column by using a StringIndexer as we saw in chapter 3.

* OneHot encoding which converts each possible value to its own boolean feature is not needed.

In [17]:
len(df.columns)

75

### Reproducing exercise columns: 253

In [18]:
# copied from Datacamp
exercise_columns = [
    "SALESCLOSEPRICE",
    "LISTPRICE",
    "ORIGINALLISTPRICE",
    "FOUNDATIONSIZE",
    "DAYSONMARKET",
    "FIREPLACES",
    "SQFTABOVEGROUND",
    "TAXES",
    "TAXWITHASSESSMENTS",
    "LIVINGAREA",
    "YEARBUILT",
    "ACRES",
    "BATHSFULL",
    "BATHSHALF",
    "BATHQUARTER",
    "BATHSTHREEQUARTER",
    "BATHSTOTAL",
    "BEDROOMS",
    "SQFTBELOWGROUND",
    "ASSOCIATIONFEE",
    "ASSESSEDVALUATION",
    "LIST_YEAR",
    "LIST_MONTH",
    "LIST_WEEKOFYEAR",
    "LIST_DAYOFMONTH",
    "LIST_DAYOFYEAR",
    "WALKSCORE",
    "BIKESCORE",
    "MORTGAGE30US",
    "MORTGAGE30US-1WK",
    "MORTGAGE30US-2WK",
    "MORTGAGE30US-3WK",
    "MORTGAGE30US-4WK",
    "PERCENT_OLDER_AGE_HOMES",
    "PERCENT_SIMILAR_AGE_HOMES",
    "PERCENT_NEWER_AGE_HOMES",
    "PERCENT_BIGGER_SIZE_HOMES",
    "PERCENT_SIMILAR_SIZE_HOMES",
    "PERCENT_SMALLER_SIZE_HOMES",
    "LISTING_TO_MEDIAN_RATIO",
    "LISTING_PRICE_PER_SQFT",
    "SQFT_TOTAL",
    "ASSESSED_TO_LIST",
    "TAX_TO_LIST",
    "BED_TO_BATHS",
    "PRICE_REDUCTION_PERCENT",
    "FENCE_WIRE",
    "FENCE_ELECTRIC",
    "FENCE_NAN",
    "FENCE_PARTIAL",
    "FENCE_RAIL",
    "FENCE_OTHER",
    "FENCE_CHAIN LINK",
    "FENCE_FULL",
    "FENCE_NONE",
    "FENCE_PRIVACY",
    "FENCE_WOOD",
    "FENCE_INVISIBLE",
    "ROOF_ASPHALT SHINGLES",
    "ROOF_SHAKES",
    "ROOF_NAN",
    "ROOF_UNSPECIFIED SHINGLE",
    "ROOF_SLATE",
    "ROOF_PITCHED",
    "ROOF_FLAT",
    "ROOF_TAR/GRAVEL",
    "ROOF_OTHER",
    "ROOF_METAL",
    "ROOF_TILE",
    "ROOF_RUBBER",
    "ROOF_WOOD SHINGLES",
    "ROOF_AGE OVER 8 YEARS",
    "ROOF_AGE 8 YEARS OR LESS",
    "POOLDESCRIPTION_NAN",
    "POOLDESCRIPTION_HEATED",
    "POOLDESCRIPTION_NONE",
    "POOLDESCRIPTION_SHARED",
    "POOLDESCRIPTION_INDOOR",
    "POOLDESCRIPTION_OUTDOOR",
    "POOLDESCRIPTION_ABOVE GROUND",
    "POOLDESCRIPTION_BELOW GROUND",
    "GARAGEDESCRIPTION_ASSIGNED",
    "GARAGEDESCRIPTION_TANDEM",
    "GARAGEDESCRIPTION_UNCOVERED/OPEN",
    "GARAGEDESCRIPTION_TUCKUNDER",
    "GARAGEDESCRIPTION_DRIVEWAY - ASPHALT",
    "GARAGEDESCRIPTION_HEATED GARAGE",
    "GARAGEDESCRIPTION_UNDERGROUND GARAGE",
    "GARAGEDESCRIPTION_DRIVEWAY - SHARED",
    "GARAGEDESCRIPTION_CONTRACT PKG REQUIRED",
    "GARAGEDESCRIPTION_GARAGE DOOR OPENER",
    "GARAGEDESCRIPTION_MORE PARKING OFFSITE FOR FEE",
    "GARAGEDESCRIPTION_VALET PARKING FOR FEE",
    "GARAGEDESCRIPTION_OTHER",
    "GARAGEDESCRIPTION_MORE PARKING ONSITE FOR FEE",
    "GARAGEDESCRIPTION_DRIVEWAY - OTHER SURFACE",
    "GARAGEDESCRIPTION_DETACHED GARAGE",
    "GARAGEDESCRIPTION_SECURED",
    "GARAGEDESCRIPTION_CARPORT",
    "GARAGEDESCRIPTION_DRIVEWAY - CONCRETE",
    "GARAGEDESCRIPTION_ON-STREET PARKING ONLY",
    "GARAGEDESCRIPTION_COVERED",
    "GARAGEDESCRIPTION_INSULATED GARAGE",
    "GARAGEDESCRIPTION_UNASSIGNED",
    "GARAGEDESCRIPTION_NONE",
    "GARAGEDESCRIPTION_DRIVEWAY - GRAVEL",
    "GARAGEDESCRIPTION_NO INT ACCESS TO DWELLING",
    "GARAGEDESCRIPTION_UNITS VARY",
    "GARAGEDESCRIPTION_ATTACHED GARAGE",
    "APPLIANCES_NAN",
    "APPLIANCES_COOKTOP",
    "APPLIANCES_WALL OVEN",
    "APPLIANCES_WATER SOFTENER - OWNED",
    "APPLIANCES_DISPOSAL",
    "APPLIANCES_DISHWASHER",
    "APPLIANCES_OTHER",
    "APPLIANCES_INDOOR GRILL",
    "APPLIANCES_WASHER",
    "APPLIANCES_RANGE",
    "APPLIANCES_REFRIGERATOR",
    "APPLIANCES_FURNACE HUMIDIFIER",
    "APPLIANCES_TANKLESS WATER  HEATER",
    "APPLIANCES_ELECTRONIC AIR FILTER",
    "APPLIANCES_MICROWAVE",
    "APPLIANCES_EXHAUST FAN/HOOD",
    "APPLIANCES_NONE",
    "APPLIANCES_CENTRAL VACUUM",
    "APPLIANCES_TRASH COMPACTOR",
    "APPLIANCES_AIR-TO-AIR EXCHANGER",
    "APPLIANCES_DRYER",
    "APPLIANCES_FREEZER",
    "APPLIANCES_WATER SOFTENER - RENTED",
    "EXTERIOR_SHAKES",
    "EXTERIOR_CEMENT BOARD",
    "EXTERIOR_BLOCK",
    "EXTERIOR_VINYL",
    "EXTERIOR_FIBER BOARD",
    "EXTERIOR_OTHER",
    "EXTERIOR_METAL",
    "EXTERIOR_BRICK/STONE",
    "EXTERIOR_STUCCO",
    "EXTERIOR_ENGINEERED WOOD",
    "EXTERIOR_WOOD",
    "DININGROOMDESCRIPTION_EAT IN KITCHEN",
    "DININGROOMDESCRIPTION_NAN",
    "DININGROOMDESCRIPTION_OTHER",
    "DININGROOMDESCRIPTION_LIVING/DINING ROOM",
    "DININGROOMDESCRIPTION_SEPARATE/FORMAL DINING ROOM",
    "DININGROOMDESCRIPTION_KITCHEN/DINING ROOM",
    "DININGROOMDESCRIPTION_INFORMAL DINING ROOM",
    "DININGROOMDESCRIPTION_BREAKFAST AREA",
    "BASEMENT_FINISHED (LIVABLE)",
    "BASEMENT_PARTIAL",
    "BASEMENT_SUMP PUMP",
    "BASEMENT_INSULATING CONCRETE FORMS",
    "BASEMENT_CRAWL SPACE",
    "BASEMENT_PARTIAL FINISHED",
    "BASEMENT_CONCRETE BLOCK",
    "BASEMENT_DRAINAGE SYSTEM",
    "BASEMENT_POURED CONCRETE",
    "BASEMENT_UNFINISHED",
    "BASEMENT_DRAIN TILED",
    "BASEMENT_WOOD",
    "BASEMENT_FULL",
    "BASEMENT_EGRESS WINDOWS",
    "BASEMENT_DAY/LOOKOUT WINDOWS",
    "BASEMENT_SLAB",
    "BASEMENT_STONE",
    "BASEMENT_NONE",
    "BASEMENT_WALKOUT",
    "BATHDESC_MAIN FLOOR 1/2 BATH",
    "BATHDESC_TWO MASTER BATHS",
    "BATHDESC_MASTER WALK-THRU",
    "BATHDESC_WHIRLPOOL",
    "BATHDESC_NAN",
    "BATHDESC_3/4 BASEMENT",
    "BATHDESC_TWO BASEMENT BATHS",
    "BATHDESC_OTHER",
    "BATHDESC_3/4 MASTER",
    "BATHDESC_MAIN FLOOR 3/4 BATH",
    "BATHDESC_FULL MASTER",
    "BATHDESC_MAIN FLOOR FULL BATH",
    "BATHDESC_WALK-IN SHOWER",
    "BATHDESC_SEPARATE TUB & SHOWER",
    "BATHDESC_FULL BASEMENT",
    "BATHDESC_BASEMENT",
    "BATHDESC_WALK THRU",
    "BATHDESC_BATHROOM ENSUITE",
    "BATHDESC_PRIVATE MASTER",
    "BATHDESC_JACK & JILL 3/4",
    "BATHDESC_UPPER LEVEL 1/2 BATH",
    "BATHDESC_ROUGH IN",
    "BATHDESC_UPPER LEVEL FULL BATH",
    "BATHDESC_1/2 MASTER",
    "BATHDESC_1/2 BASEMENT",
    "BATHDESC_JACK AND JILL",
    "BATHDESC_UPPER LEVEL 3/4 BATH",
    "ZONING_INDUSTRIAL",
    "ZONING_BUSINESS/COMMERCIAL",
    "ZONING_OTHER",
    "ZONING_RESIDENTIAL-SINGLE",
    "ZONING_RESIDENTIAL-MULTI-FAMILY",
    "COOLINGDESCRIPTION_WINDOW",
    "COOLINGDESCRIPTION_WALL",
    "COOLINGDESCRIPTION_DUCTLESS MINI-SPLIT",
    "COOLINGDESCRIPTION_NONE",
    "COOLINGDESCRIPTION_GEOTHERMAL",
    "COOLINGDESCRIPTION_CENTRAL",
    "CITY:LELM - LAKE ELMO",
    "CITY:MAPW - MAPLEWOOD",
    "CITY:OAKD - OAKDALE",
    "CITY:STP - SAINT PAUL",
    "CITY:WB - WOODBURY",
    "LISTTYPE:EXCLUSIVE AGENCY",
    "LISTTYPE:EXCLUSIVE RIGHT",
    "LISTTYPE:EXCLUSIVE RIGHT WITH EXCLUSIONS",
    "LISTTYPE:OTHER",
    "LISTTYPE:SERVICE AGREEMENT",
    "SCHOOLDISTRICTNUMBER:6 - SOUTH ST. PAUL",
    "SCHOOLDISTRICTNUMBER:622 - NORTH ST PAUL-MAPLEWOOD",
    "SCHOOLDISTRICTNUMBER:623 - ROSEVILLE",
    "SCHOOLDISTRICTNUMBER:624 - WHITE BEAR LAKE",
    "SCHOOLDISTRICTNUMBER:625 - ST. PAUL",
    "SCHOOLDISTRICTNUMBER:832 - MAHTOMEDI",
    "SCHOOLDISTRICTNUMBER:833 - SOUTH WASHINGTON COUNTY",
    "SCHOOLDISTRICTNUMBER:834 - STILLWATER",
    "POTENTIALSHORTSALE:NO",
    "POTENTIALSHORTSALE:NOT DISCLOSED",
    "STYLE:(CC) CONVERTED MANSION",
    "STYLE:(CC) HIGH RISE (4+ LEVELS)",
    "STYLE:(CC) LOW RISE (3- LEVELS)",
    "STYLE:(CC) MANOR/VILLAGE",
    "STYLE:(CC) TWO UNIT",
    "STYLE:(SF) FOUR OR MORE LEVEL SPLIT",
    "STYLE:(SF) MODIFIED TWO STORY",
    "STYLE:(SF) MORE THAN TWO STORIES",
    "STYLE:(SF) ONE 1/2 STORIES",
    "STYLE:(SF) ONE STORY",
    "STYLE:(SF) OTHER",
    "STYLE:(SF) SPLIT ENTRY (BI-LEVEL)",
    "STYLE:(SF) THREE LEVEL SPLIT",
    "STYLE:(SF) TWO STORIES",
    "STYLE:(TH) DETACHED",
    "STYLE:(TH) QUAD/4 CORNERS",
    "STYLE:(TH) SIDE X SIDE",
    "STYLE:(TW) TWIN HOME",
    "ASSUMABLEMORTGAGE:INFORMATION COMING",
    "ASSUMABLEMORTGAGE:NOT ASSUMABLE",
    "ASSUMABLEMORTGAGE:YES W/ QUALIFYING",
    "ASSUMABLEMORTGAGE:YES W/NO QUALIFYING",
    "ASSESSMENTPENDING:NO",
    "ASSESSMENTPENDING:UNKNOWN",
    "ASSESSMENTPENDING:YES",
]
len(exercise_columns)

253

In [19]:
walk_score = [25.0, 2.0, 25.0, 2.0, 40.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 0.0, 11.0, 11.0, 11.0, 11.0, 11.0, 8.0, 8.0, 0.0, 5.0, 1.0, 5.0, 0.0, 2.0, 2.0, 8.0, 8.0, 1.0, 1.0, 5.0, 8.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 21.0, 6.0, 14.0, 21.0, 21.0, 18.0, 4.0, 21.0, 12.0, 4.0, 21.0, 0.0, 0.0, 6.0, 20.0, None, 0.0, 21.0, 5.0, 0.0, 21.0, 21.0, 21.0, 0.0, None, 9.0, 20.0, 0.0, 0.0, 0.0, 0.0, 5.0, 21.0, 14.0, 11.0, 14.0, 22.0, 11.0, 2.0, 21.0, 14.0, 0.0, 10.0, 7.0, 12.0, 0.0, 0.0, None, 20.0, 6.0, 20.0, 1.0, 0.0, 0.0, 16.0, 0.0, 14.0, 0.0, 12.0, 4.0, 21.0, 21.0, 0.0, 10.0, 29.0, 14.0, 21.0, None, 7.0, 4.0, 22.0, 10.0, 17.0, 0.0, 17.0, 0.0, 11.0, 0.0, 14.0, 0.0, 0.0, 16.0, 8.0, 11.0, 0.0, 17.0, 1.0, 17.0, None, 0.0, None, None, 9.0, 17.0, 6.0, 10.0, 17.0, None, 0.0, 8.0, 0.0, 9.0, 1.0, 4.0, 0.0, 1.0, 9.0, 0.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, None, 14.0, 16.0, 13.0, None, 13.0, 21.0, 17.0, 21.0, 17.0, 30.0, 16.0, 22.0, 16.0, 18.0, 13.0, 17.0, None, 21.0, 19.0, 25.0, 8.0, 39.0, 22.0, 19.0, 21.0, 23.0, 19.0, 6.0, 28.0, 3.0, 19.0, 27.0, 19.0, 20.0, 19.0, 27.0, 19.0, 42.0, 47.0, 47.0, 42.0, 6.0, 27.0, 18.0, 9.0, 21.0, 3.0, 42.0, 19.0, 22.0, 30.0, 6.0, 11.0, 6.0, 27.0, 14.0, 14.0, 14.0, 28.0, 27.0, 29.0, 4.0, 8.0, 55.0, None, 13.0, 34.0, 6.0, 16.0, 19.0, 29.0, 42.0, 18.0, 46.0, 23.0, 5.0, 4.0, 51.0, 12.0, 29.0, 5.0, 5.0, 22.0, 18.0, 32.0, 26.0, 28.0, 13.0, 29.0, 22.0, 27.0, 28.0, 26.0, 70.0, 26.0, 40.0, 7.0, 29.0, 50.0, 55.0, 19.0, 14.0, 50.0, 22.0, 50.0, 14.0, 50.0, 8.0, 35.0, 12.0, 50.0, 13.0, 23.0, 28.0, 15.0, 13.0, 7.0, 40.0, 13.0, 56.0, 50.0, 29.0, 17.0, 32.0, 50.0, 29.0, 17.0, 40.0, 26.0, 60.0, 21.0, 26.0, 11.0, 5.0, 38.0, 50.0, 50.0, 50.0, 55.0, 13.0, 55.0, 38.0, 34.0, 24.0, 13.0, 55.0, 36.0, 28.0, 35.0, 29.0, 17.0, 36.0, 24.0, 33.0, 29.0, 34.0, 21.0, 17.0, 36.0, 17.0, 30.0, 46.0, 18.0, 28.0, 13.0, 23.0, 25.0, 28.0, 55.0, 13.0, 16.0, 27.0, 38.0, 16.0, 28.0, 26.0, 22.0, 25.0, 28.0, 35.0, 15.0, 22.0, 28.0, 13.0, 5.0, 29.0, 16.0, 21.0, 31.0, 30.0, 24.0, 50.0, 24.0, 32.0, 48.0, 5.0, 27.0, 54.0, 32.0, 13.0, None, 50.0, 34.0, 17.0, 24.0, 50.0, 13.0, 5.0, 16.0, 19.0, 4.0, 31.0, 53.0, 19.0, 39.0, 29.0, 22.0, 12.0, 40.0, 28.0, 55.0, 28.0, 16.0, 20.0, 19.0, 9.0, 30.0, 40.0, 34.0, 25.0, 5.0, 14.0, 17.0, 44.0, 4.0, 54.0, 33.0, 41.0, 24.0, 19.0, 19.0, 12.0, 26.0, 30.0, 27.0, 14.0, 52.0, 22.0, 42.0, 13.0, 19.0, 41.0, 14.0, 13.0, 47.0, 20.0, 24.0, 31.0, 25.0, 22.0, 21.0, 22.0, 48.0, 33.0, 30.0, 15.0, 11.0, 29.0, 28.0, 12.0, 36.0, 29.0, 30.0, 38.0, 24.0, 10.0, 8.0, 21.0, 27.0, 54.0, 26.0, 41.0, 50.0, 36.0, 18.0, 14.0, 12.0, 13.0, 40.0, 48.0, 45.0, 30.0, 27.0, 20.0, 15.0, 30.0, 19.0, 9.0, 41.0, 19.0, 23.0, 21.0, 8.0, 24.0, 32.0, 32.0, 24.0, 47.0, 16.0, 29.0, 30.0, 24.0, 35.0, 22.0, 2.0, 10.0, 2.0, 58.0, 10.0, 24.0, 5.0, 17.0, 29.0, 28.0, 1.0, 11.0, 36.0, 33.0, 17.0, 23.0, 34.0, 13.0, 6.0, 48.0, 2.0, 36.0, 38.0, 14.0, 18.0, 17.0, 7.0, 2.0, 5.0, 31.0, 58.0, 28.0, 41.0, 36.0, 2.0, 30.0, 34.0, 9.0, 54.0, 12.0, 5.0, 15.0, 24.0, 10.0, 24.0, 14.0, 15.0, 37.0, 30.0, 12.0, 2.0, 15.0, 7.0, None, 4.0, 14.0, 2.0, 37.0, 15.0, 38.0, 38.0, 28.0, 28.0, 30.0, 36.0, 16.0, 2.0, 8.0, 13.0, 2.0, 11.0, 1.0, 9.0, 15.0, 3.0, 26.0, 3.0, 15.0, 5.0, 17.0, 1.0, 5.0, 16.0, 5.0, 20.0, 5.0, 9.0, 6.0, 10.0, 17.0, 22.0, 3.0, 2.0, 17.0, 1.0, 12.0, 2.0, 9.0, 9.0, 12.0, 9.0, 11.0, 2.0, 22.0, 27.0, 22.0, 12.0, 2.0, 1.0, 1.0, 10.0, 16.0, 2.0, 16.0, 17.0, 13.0, 29.0, 16.0, 4.0, 15.0, 24.0, 7.0, 20.0, 1.0, 0.0, 35.0, 24.0, 19.0, 12.0, 27.0, 1.0, 12.0, 1.0, 4.0, 5.0, 8.0, 15.0, 13.0, 2.0, 3.0, 4.0, 5.0, 4.0, 0.0, 2.0, 9.0, 3.0, 7.0, 5.0, 1.0, 15.0, 11.0, 1.0, 3.0, 19.0, 5.0, 2.0, 7.0, 22.0, 22.0, 9.0, 9.0, 33.0, 14.0, 28.0, 29.0, 28.0, 9.0, 29.0, 29.0, 9.0, 34.0, 8.0, 14.0, 35.0, 3.0, None, 6.0, 3.0, None, 3.0, 24.0, 29.0, 29.0, 22.0, 3.0, 29.0, 9.0, 35.0, 26.0, 3.0, 34.0, 28.0, 26.0, 28.0, 28.0, 14.0, 3.0, 6.0, 10.0, 9.0, 35.0, 25.0, 9.0, 11.0, 12.0, 2.0, 25.0, 35.0, 9.0, 14.0, 14.0, 9.0, 10.0, 10.0, 3.0, 10.0, 1.0, 1.0, 5.0, 9.0, 33.0, 14.0, 33.0, 3.0, 12.0, 3.0, 9.0, 35.0, 9.0, 9.0, 10.0, 33.0, 20.0, 12.0, 1.0, 25.0, 18.0, 33.0, 32.0, 1.0, 9.0, 9.0, 1.0, 36.0, 2.0, 18.0, 3.0, 22.0, 12.0, 32.0, 43.0, 20.0, 15.0, 9.0, 35.0, 26.0, None, 18.0, 8.0, 9.0, 12.0, 4.0, 1.0, 29.0, 12.0, 9.0, 41.0, 10.0, 0.0, 30.0, 10.0, 2.0, 34.0, 15.0, 9.0, 46.0, 44.0, 34.0, 2.0, 10.0, 8.0, 34.0, 11.0, 32.0, 12.0, 1.0, 37.0, 34.0, 20.0, 9.0, 40.0, 1.0, 8.0, 41.0, 35.0, 14.0, 42.0, 9.0, 18.0, 51.0, 30.0, 18.0, 43.0, 8.0, 36.0, 0.0, 21.0, 8.0, 25.0, 17.0, 26.0, 3.0, 34.0, 18.0, 22.0, 36.0, 25.0, 52.0, 25.0, 25.0, 9.0, 11.0, 3.0, 26.0, 12.0, 3.0, 3.0, 14.0, 34.0, 29.0, 31.0, 11.0, 3.0, 34.0, 34.0, 31.0, 20.0, 30.0, 20.0, 45.0, 34.0, 21.0, 18.0, 37.0, 3.0, 55.0, 37.0, 46.0, 35.0, 27.0, 1.0, 25.0, 41.0, 24.0, 44.0, 42.0, 12.0, 45.0, 7.0, 43.0, 53.0, 5.0, 43.0, 41.0, 44.0, 9.0, 3.0, 42.0, 10.0, 25.0, 29.0, 34.0, 12.0, 7.0, 3.0, 22.0, 38.0, 15.0, 24.0, 5.0, 38.0, 3.0, 1.0, 3.0, 12.0, 9.0, 3.0, 7.0, 45.0, 22.0, 2.0, 28.0, 15.0, 25.0, 42.0, 0.0, 4.0, 24.0, 9.0, 22.0, 2.0, 10.0, 41.0, 38.0, 37.0, 1.0, 53.0, 24.0, 9.0, 35.0, 4.0, 25.0, 43.0, 1.0, 1.0, 1.0, 9.0, 3.0, 37.0, 10.0, 10.0, 0.0, 37.0, 10.0, 35.0, 1.0, 15.0, 3.0, 42.0, 9.0, 14.0, 1.0, 42.0, 3.0, 1.0, 5.0, 9.0, 3.0, 1.0, 5.0, 22.0, 25.0, 18.0, 22.0, 3.0, 7.0, 4.0, 5.0, 16.0, 44.0, 35.0, 12.0, 3.0, 42.0, 16.0, 25.0, 4.0, 2.0, 2.0, 7.0, 8.0, 30.0, 8.0, 22.0, 10.0, 3.0, 14.0, 4.0, 7.0, 30.0, 2.0, 14.0, 36.0, 6.0, 0.0, 2.0, 2.0, 2.0, 3.0, 2.0, 30.0, 1.0, 3.0, 2.0, 10.0, 7.0, 37.0, 8.0, 16.0, 2.0, 1.0, 27.0, 0.0, 9.0, 12.0, 8.0, 1.0, 3.0, 29.0, 0.0, 3.0, 0.0, 5.0, 9.0, 7.0, 8.0, 24.0, 7.0, 8.0, 13.0, 8.0, 2.0, 28.0, 9.0, 25.0, 8.0, 25.0, 5.0, 39.0, 7.0, 0.0, 28.0, 1.0, 5.0, 8.0, 27.0, 0.0, 28.0, 5.0, 8.0, 1.0, 1.0, 18.0, 10.0, 38.0, 3.0, 20.0, 15.0, 27.0, 46.0, 0.0, 27.0, 0.0, 18.0, 2.0, 21.0, 27.0, 0.0, 1.0, 32.0, 29.0, 29.0, 24.0, 23.0, 29.0, 0.0, 46.0, 79.0, 73.0, None, 64.0, 77.0, 63.0, 79.0, 71.0, 12.0, 57.0, 57.0, 54.0, 49.0, 57.0, 65.0, 71.0, 54.0, 45.0, 97.0, 83.0, 53.0, 64.0, 82.0, 69.0, 57.0, 63.0, 57.0, 63.0, 33.0, 51.0, 38.0, 79.0, 55.0, 75.0, 40.0, 57.0, 54.0, 50.0, 59.0, 38.0, 61.0, 65.0, 40.0, 85.0, 57.0, 79.0, 62.0, 38.0, 39.0, 96.0, 57.0, 79.0, 79.0, 86.0, 97.0, 86.0, 28.0, 40.0, 50.0, 79.0, 46.0, 61.0, 40.0, 45.0, 84.0, 46.0, 57.0, 82.0, 86.0, 57.0, 75.0, 62.0, 74.0, 98.0, 42.0, 38.0, 49.0, 96.0, 71.0, 55.0, 64.0, 70.0, 43.0, 60.0, 39.0, 68.0, 68.0, 70.0, 78.0, 52.0, 72.0, 77.0, 51.0, 34.0, 22.0, 43.0, 48.0, 57.0, 52.0, 96.0, 37.0, None, 13.0, 42.0, 65.0, 64.0, 57.0, 80.0, 43.0, 33.0, 26.0, 40.0, 30.0, 44.0, 96.0, 57.0, 78.0, None, 61.0, 34.0, 63.0, 64.0, 61.0, 28.0, 40.0, 57.0, 39.0, 70.0, 68.0, 30.0, 85.0, 62.0, 79.0, 57.0, 42.0, 38.0, 40.0, 76.0, 18.0, 43.0, 55.0, 60.0, 33.0, 56.0, 35.0, 88.0, 23.0, 83.0, 51.0, 52.0, 65.0, 48.0, None, 63.0, 48.0, 96.0, 71.0, 96.0, 64.0, 83.0, 83.0, 70.0, None, 22.0, 79.0, 43.0, 78.0, 96.0, 78.0, 41.0, 21.0, 72.0, 83.0, 63.0, 19.0, 50.0, 60.0, 54.0, 35.0, 55.0, 84.0, 81.0, 96.0, 98.0, 60.0, 35.0, 43.0, 73.0, 44.0, 70.0, 77.0, 68.0, 68.0, 96.0, 56.0, 60.0, 51.0, 63.0, 76.0, 36.0, 51.0, None, 49.0, 72.0, 56.0, 64.0, 86.0, 6.0, 43.0, 59.0, 83.0, 43.0, 70.0, 43.0, 75.0, 41.0, 70.0, 96.0, 83.0, 59.0, 53.0, 41.0, 52.0, None, 59.0, 59.0, 77.0, 39.0, 52.0, None, None, 5.0, 22.0, 67.0, 70.0, 96.0, None, 63.0, 57.0, 40.0, 70.0, 51.0, None, 40.0, 44.0, 77.0, 83.0, 98.0, 56.0, 31.0, 41.0, 80.0, 75.0, 96.0, 58.0, 78.0, 51.0, 42.0, 28.0, 46.0, 60.0, 38.0, 63.0, 81.0, 55.0, 33.0, 54.0, 39.0, 50.0, 39.0, 21.0, 81.0, 67.0, 46.0, 39.0, 33.0, 96.0, 40.0, 68.0, 77.0, 58.0, 60.0, 69.0, 42.0, None, 57.0, None, 47.0, 81.0, 54.0, 49.0, 70.0, 43.0, 38.0, 29.0, 37.0, 45.0, 42.0, 81.0, 69.0, 64.0, 72.0, 46.0, 66.0, 47.0, 63.0, 47.0, 40.0, 60.0, 53.0, 63.0, 50.0, 65.0, 69.0, 48.0, 42.0, None, 58.0, 76.0, 77.0, 39.0, 38.0, 77.0, 75.0, 39.0, 53.0, 70.0, 81.0, 58.0, 38.0, 52.0, None, 62.0, 74.0, 51.0, 64.0, 57.0, None, 46.0, 61.0, 70.0, 84.0, 24.0, 64.0, 60.0, 45.0, 39.0, 68.0, 77.0, 65.0, 22.0, 83.0, 51.0, 69.0, 74.0, 66.0, 40.0, 59.0, 69.0, 52.0, 77.0, 27.0, 25.0, 58.0, 45.0, 67.0, 67.0, 40.0, 75.0, 96.0, 62.0, 63.0, 57.0, 44.0, 47.0, 77.0, 64.0, 38.0, 70.0, 62.0, 76.0, 41.0, 84.0, 52.0, 84.0, 78.0, 71.0, 69.0, 54.0, 35.0, 46.0, 42.0, 65.0, 70.0, 70.0, 79.0, 38.0, 54.0, 69.0, 98.0, 61.0, None, 83.0, 81.0, 85.0, 43.0, 34.0, 72.0, 84.0, 63.0, 41.0, 78.0, 35.0, 28.0, 46.0, None, 70.0, 18.0, 45.0, 54.0, 55.0, 49.0, 63.0, 63.0, 78.0, 62.0, 33.0, 54.0, 64.0, 50.0, 37.0, 53.0, 55.0, 45.0, 42.0, 54.0, 42.0, 42.0, 62.0, 48.0, 50.0, 51.0, 30.0, 69.0, 55.0, 41.0, 82.0, 57.0, 67.0, 75.0, 61.0, 86.0, 33.0, 35.0, 67.0, 84.0, 42.0, 41.0, 72.0, 60.0, 51.0, 52.0, 52.0, 59.0, 38.0, 66.0, 39.0, 78.0, 42.0, 44.0, 54.0, 86.0, 83.0, 52.0, None, 50.0, 43.0, 86.0, 80.0, 77.0, 42.0, 63.0, 47.0, 64.0, 47.0, 57.0, 58.0, 38.0, 34.0, 57.0, 56.0, 23.0, 96.0, 52.0, 81.0, 74.0, 67.0, 57.0, 42.0, 63.0, 34.0, 66.0, None, 74.0, 30.0, 61.0, 18.0, 68.0, 39.0, 38.0, 43.0, 44.0, 63.0, 56.0, 9.0, 45.0, 76.0, 43.0, 50.0, 33.0, 43.0, 31.0, 70.0, 79.0, 79.0, 13.0, 37.0, 59.0, 56.0, 33.0, None, 75.0, 96.0, 66.0, 65.0, 80.0, 36.0, 55.0, 81.0, 61.0, 83.0, 82.0, 19.0, 74.0, 49.0, 63.0, 64.0, 45.0, 89.0, 77.0, 82.0, 73.0, 61.0, 51.0, 61.0, 95.0, 38.0, 67.0, 41.0, 65.0, None, 38.0, 76.0, 65.0, 35.0, 97.0, 95.0, None, 42.0, 56.0, 43.0, 84.0, 61.0, 37.0, 53.0, 78.0, 40.0, 63.0, 60.0, 53.0, 68.0, 82.0, 76.0, 80.0, 76.0, 66.0, 62.0, 81.0, 78.0, 67.0, None, 45.0, 46.0, None, 22.0, 77.0, 61.0, 63.0, None, 44.0, 42.0, 39.0, None, 49.0, 17.0, 67.0, None, 63.0, 50.0, 96.0, 36.0, 43.0, 70.0, 71.0, 38.0, 36.0, 95.0, 42.0, 72.0, 50.0, None, 61.0, 83.0, 69.0, 49.0, 66.0, 38.0, 67.0, 62.0, 36.0, 70.0, 80.0, 48.0, 34.0, 46.0, 59.0, 95.0, 57.0, 40.0, 45.0, 71.0, 41.0, 62.0, 51.0, 45.0, 21.0, 35.0, 70.0, 42.0, 43.0, 55.0, 49.0, 64.0, 33.0, 71.0, 71.0, 51.0, 71.0, 26.0, 44.0, 63.0, 84.0, 66.0, 46.0, 13.0, 35.0, 67.0, 89.0, 35.0, 69.0, 45.0, 6.0, 54.0, 95.0, 36.0, 37.0, 52.0, 84.0, 97.0, 37.0, 70.0, 35.0, 39.0, 29.0, 56.0, 85.0, 75.0, 83.0, 62.0, 41.0, 28.0, 55.0, 72.0, 75.0, 66.0, 66.0, 50.0, 66.0, 26.0, 54.0, 44.0, 80.0, 46.0, 37.0, 29.0, None, 26.0, 45.0, 63.0, 41.0, 19.0, 75.0, 62.0, 46.0, 66.0, 30.0, 61.0, 46.0, 37.0, 95.0, 80.0, 29.0, 69.0, 27.0, 95.0, 81.0, 46.0, 43.0, 46.0, 78.0, 63.0, 69.0, 47.0, 63.0, 7.0, 55.0, 74.0, 15.0, 76.0, 56.0, 76.0, 47.0, 89.0, 39.0, 47.0, 52.0, 74.0, 65.0, 42.0, 59.0, None, 43.0, 67.0, 51.0, 64.0, 66.0, None, 61.0, 62.0, 76.0, 64.0, 3.0, 56.0, 60.0, 36.0, 74.0, 78.0, 77.0, None, 36.0, 58.0, 62.0, 31.0, 37.0, 53.0, 26.0, 47.0, 40.0, 41.0, 41.0, 57.0, 51.0, 42.0, 49.0, 56.0, 62.0, 69.0, 58.0, 85.0, 79.0, 55.0, 21.0, 50.0, 50.0, 52.0, 66.0, 69.0, 40.0, 24.0, 56.0, 42.0, 59.0, 69.0, 64.0, 37.0, 45.0, 69.0, 40.0, 41.0, 65.0, 34.0, 43.0, 35.0, 76.0, 52.0, 47.0, 84.0, 79.0, 56.0, 79.0, 84.0, 22.0, 65.0, 36.0, 35.0, 75.0, 44.0, 51.0, 57.0, 39.0, 71.0, 35.0, 38.0, 46.0, 56.0, 64.0, 44.0, 43.0, 37.0, 35.0, 36.0, 43.0, 41.0, 94.0, 59.0, 64.0, 46.0, 44.0, 41.0, 62.0, 83.0, 59.0, 19.0, 48.0, 9.0, 72.0, 65.0, 56.0, 70.0, 46.0, 67.0, 71.0, 61.0, 51.0, 39.0, 65.0, 59.0, 69.0, 24.0, 77.0, None, 69.0, 43.0, 31.0, 60.0, 59.0, 84.0, 53.0, 76.0, 70.0, 35.0, 34.0, 72.0, 72.0, 17.0, 76.0, 37.0, 51.0, 38.0, 47.0, 79.0, 87.0, 50.0, 50.0, 28.0, 48.0, 22.0, 49.0, 60.0, 71.0, 77.0, 71.0, 46.0, 27.0, 61.0, 71.0, 39.0, 34.0, 51.0, 26.0, 43.0, 47.0, 36.0, 18.0, 79.0, 51.0, 44.0, 49.0, 71.0, 40.0, 75.0, 67.0, 87.0, 57.0, 65.0, 33.0, 51.0, 43.0, 67.0, 59.0, 42.0, 28.0, 19.0, 58.0, 67.0, 96.0, 41.0, 49.0, 20.0, 50.0, 44.0, 53.0, 70.0, 39.0, 72.0, 43.0, 82.0, 64.0, 63.0, 64.0, 70.0, 38.0, 21.0, 42.0, 59.0, 40.0, 41.0, 59.0, 47.0, 49.0, 37.0, 34.0, 70.0, 35.0, 80.0, 78.0, 37.0, 67.0, 65.0, 75.0, 72.0, 70.0, 56.0, 70.0, 25.0, 46.0, 62.0, 59.0, 56.0, 46.0, 12.0, 64.0, 36.0, 26.0, 83.0, 51.0, 16.0, 78.0, 33.0, 59.0, 36.0, 41.0, 95.0, 46.0, 70.0, 86.0, 62.0, 47.0, 36.0, 17.0, 78.0, 70.0, 40.0, 43.0, 64.0, 26.0, 42.0, None, 38.0, 38.0, 37.0, 50.0, 38.0, 62.0, 33.0, None, 6.0, None, 59.0, None, None, None, None, None, None, 13.0, 22.0, 48.0, None, 42.0, 45.0, None, 43.0, None, None, 47.0, 57.0, 43.0, None, 71.0, 41.0, 56.0, None, 40.0, 47.0, 26.0, 57.0, 40.0, 47.0, 42.0, None, None, 64.0, None, None, None, None, 80.0, None, 83.0, 44.0, 79.0, 45.0, None, 67.0, 56.0, 40.0, 39.0, 47.0, None, 39.0, 40.0, 61.0, 42.0, 35.0, 28.0, None, 18.0, None, 39.0, 24.0, 26.0, 59.0, 31.0, 34.0, 47.0, 65.0, 54.0, 50.0, 62.0, 57.0, 55.0, 70.0, 68.0, 47.0, 74.0, 43.0, 47.0, 46.0, 83.0, 13.0, 95.0, 56.0, 66.0, 43.0, 69.0, 51.0, 40.0, 96.0, 42.0, 19.0, 55.0, 62.0, 60.0, 48.0, 31.0, 62.0, 78.0, 64.0, 23.0, 51.0, 18.0, 37.0, None, 52.0, 87.0, 56.0, 49.0, 57.0, 46.0, 39.0, 47.0, 57.0, 34.0, 43.0, 77.0, 49.0, None, 83.0, 67.0, 35.0, 45.0, 44.0, 47.0, 53.0, 37.0, 43.0, 77.0, 28.0, 68.0, 36.0, 65.0, 49.0, 31.0, 5.0, 12.0, 46.0, 65.0, 81.0, 46.0, 67.0, 55.0, 65.0, 42.0, 45.0, 26.0, 67.0, 72.0, 56.0, 57.0, 85.0, 55.0, 63.0, 22.0, 19.0, 42.0, 66.0, 62.0, 97.0, 58.0, 77.0, 76.0, 68.0, 51.0, 74.0, 94.0, 96.0, 73.0, 12.0, 70.0, 43.0, 34.0, 58.0, 63.0, 66.0, 36.0, 97.0, 64.0, 33.0, 22.0, 63.0, 44.0, 43.0, 51.0, 17.0, 3.0, 4.0, 56.0, 60.0, 90.0, 72.0, 47.0, 54.0, 37.0, 25.0, 44.0, 36.0, 71.0, 37.0, 51.0, 47.0, 72.0, 5.0, 4.0, 87.0, 57.0, 85.0, 82.0, 77.0, 34.0, 27.0, 96.0, 36.0, 82.0, 47.0, 38.0, 74.0, 64.0, 53.0, 71.0, 68.0, 63.0, 71.0, 39.0, 37.0, 47.0, 36.0, 42.0, 46.0, 40.0, 46.0, None, 71.0, 64.0, 42.0, 6.0, 88.0, 64.0, 63.0, 42.0, 43.0, 46.0, 40.0, 71.0, 69.0, 38.0, 47.0, 35.0, 43.0, 67.0, 19.0, 31.0, 65.0, 42.0, 70.0, 46.0, 47.0, 25.0, 78.0, 43.0, 76.0, 49.0, 68.0, 41.0, 83.0, 45.0, 45.0, 53.0, 48.0, 42.0, 70.0, 56.0, 67.0, 96.0, 59.0, 70.0, 26.0, 74.0, 9.0, 42.0, 39.0, 48.0, 59.0, 52.0, 70.0, 40.0, 40.0, 65.0, 25.0, 46.0, 94.0, None, 17.0, 62.0, 86.0, 36.0, 43.0, 60.0, 87.0, 75.0, None, 48.0, 32.0, 73.0, 54.0, 71.0, 25.0, 40.0, 56.0, 84.0, 35.0, 51.0, 17.0, 76.0, 58.0, 52.0, 67.0, 61.0, 58.0, 75.0, 49.0, 82.0, 31.0, 49.0, 57.0, 33.0, 36.0, 62.0, 54.0, 37.0, 39.0, 71.0, 49.0, 39.0, 71.0, 55.0, 84.0, 53.0, 36.0, 41.0, 52.0, 64.0, 77.0, 35.0, None, 69.0, 41.0, 34.0, 66.0, None, 96.0, 40.0, 56.0, 63.0, 71.0, 58.0, 32.0, 48.0, 67.0, 64.0, 40.0, 54.0, 1.0, 74.0, 43.0, 43.0, 31.0, 65.0, 47.0, 57.0, 47.0, 24.0, 69.0, 49.0, None, 57.0, 56.0, 76.0, 43.0, 40.0, 33.0, 66.0, 39.0, 59.0, 94.0, 57.0, 49.0, 73.0, 48.0, 45.0, 69.0, 66.0, 55.0, 42.0, 38.0, 52.0, 52.0, 40.0, 75.0, 40.0, 43.0, 58.0, 29.0, 22.0, 95.0, 22.0, 37.0, 16.0, 67.0, 37.0, 74.0, 70.0, 49.0, 83.0, 61.0, 58.0, 39.0, 65.0, 59.0, 53.0, 43.0, 53.0, 47.0, 56.0, 43.0, 16.0, 40.0, 25.0, 27.0, 39.0, 37.0, 73.0, 62.0, 64.0, 64.0, 57.0, 34.0, 61.0, 53.0, 4.0, 83.0, 54.0, 52.0, 72.0, 35.0, 18.0, 84.0, 78.0, 82.0, 71.0, 26.0, 43.0, 53.0, None, 47.0, 49.0, 74.0, 43.0, 40.0, 58.0, 61.0, 73.0, 65.0, 81.0, 75.0, 56.0, 77.0, 98.0, 61.0, 49.0, 43.0, 96.0, 88.0, 57.0, 40.0, None, 41.0, 63.0, 53.0, 62.0, 39.0, 63.0, 16.0, 71.0, 38.0, 6.0, 65.0, 58.0, 74.0, 71.0, 49.0, 32.0, 67.0, 95.0, 69.0, None, 43.0, 69.0, 46.0, 66.0, 69.0, 65.0, 59.0, 40.0, 43.0, 63.0, 65.0, 41.0, 63.0, 20.0, 56.0, 44.0, 41.0, 85.0, 58.0, 76.0, 35.0, 61.0, 37.0, 49.0, 45.0, 44.0, 74.0, 52.0, 38.0, 50.0, 56.0, 83.0, 53.0, 63.0, 62.0, 18.0, 33.0, 20.0, 69.0, 57.0, 21.0, 36.0, 71.0, 4.0, 57.0, 43.0, 34.0, 32.0, 27.0, 33.0, 22.0, 81.0, 60.0, 49.0, 60.0, None, 43.0, 49.0, 53.0, 43.0, 51.0, 82.0, 57.0, 54.0, 64.0, 53.0, 47.0, 70.0, 42.0, 69.0, 42.0, 36.0, 87.0, 66.0, 63.0, 5.0, 9.0, 36.0, 62.0, 26.0, 82.0, 65.0, 69.0, 78.0, 43.0, 86.0, 41.0, 96.0, 57.0, 39.0, 43.0, 64.0, 45.0, 56.0, 60.0, 45.0, 45.0, 61.0, 78.0, 59.0, 48.0, 28.0, 81.0, 82.0, 65.0, 74.0, 62.0, 72.0, 32.0, 61.0, 47.0, 79.0, 78.0, 48.0, 65.0, 12.0, 55.0, 63.0, 64.0, 65.0, 70.0, 25.0, 62.0, 93.0, 81.0, 83.0, 82.0, 65.0, 25.0, 75.0, 73.0, 41.0, 22.0, 67.0, 90.0, 45.0, 53.0, 68.0, 20.0, 63.0, 73.0, 54.0, 49.0, 56.0, 87.0, 66.0, 78.0, 58.0, 60.0, 37.0, 51.0, 42.0, 71.0, 62.0, 96.0, 52.0, 63.0, 63.0, 61.0, 53.0, 84.0, 34.0, 74.0, 36.0, None, None, 78.0, 72.0, 70.0, 22.0, 59.0, 81.0, 74.0, 66.0, 51.0, 39.0, 41.0, 64.0, 22.0, 60.0, 20.0, None, 79.0, 37.0, 39.0, 21.0, 76.0, 71.0, None, 69.0, 61.0, 33.0, 74.0, 93.0, 26.0, 28.0, 33.0, 70.0, 51.0, 40.0, 66.0, 36.0, 72.0, 53.0, 47.0, 40.0, 39.0, 78.0, 28.0, 33.0, 13.0, 61.0, 40.0, 36.0, 77.0, 15.0, 39.0, 38.0, 81.0, 45.0, 57.0, 76.0, 30.0, 56.0, 70.0, 54.0, 66.0, 45.0, 66.0, 67.0, 47.0, 71.0, 61.0, 80.0, 32.0, 41.0, 94.0, 37.0, 81.0, 42.0, None, 84.0, 81.0, 59.0, 41.0, 49.0, 47.0, 43.0, 63.0, 69.0, 63.0, 73.0, 71.0, 59.0, 47.0, 64.0, 46.0, 62.0, 52.0, 59.0, 83.0, 33.0, 65.0, 95.0, 51.0, 23.0, 40.0, 18.0, 51.0, 34.0, 14.0, 49.0, 41.0, 52.0, 63.0, 40.0, 59.0, 63.0, 33.0, 65.0, 76.0, 25.0, 80.0, 32.0, None, 61.0, 71.0, 65.0, 56.0, 56.0, 82.0, 95.0, 70.0, 68.0, 61.0, 45.0, 70.0, 21.0, 39.0, 50.0, 80.0, 74.0, 94.0, 28.0, 96.0, 41.0, 4.0, 63.0, 31.0, 56.0, 66.0, 57.0, 20.0, 18.0, 71.0, 19.0, 53.0, 78.0, 38.0, 32.0, 50.0, 73.0, 57.0, 47.0, 53.0, 60.0, 90.0, 40.0, 41.0, 46.0, 56.0, 67.0, 0.0, 33.0, 60.0, 61.0, 38.0, 75.0, 25.0, 29.0, 46.0, None, 86.0, 67.0, 61.0, 45.0, 43.0, 65.0, 64.0, 19.0, 72.0, 72.0, 76.0, 84.0, 34.0, 54.0, 48.0, 68.0, 65.0, 43.0, 57.0, None, 83.0, 56.0, 58.0, 46.0, 51.0, 31.0, 52.0, 68.0, 47.0, 53.0, 64.0, 64.0, 72.0, 60.0, 49.0, 56.0, 46.0, 12.0, 19.0, 63.0, 70.0, 79.0, 74.0, 54.0, 53.0, 56.0, 33.0, 73.0, 82.0, 71.0, 73.0, 52.0, 51.0, 56.0, 47.0, 56.0, 56.0, 16.0, 73.0, 59.0, 75.0, 67.0, 25.0, 67.0, 71.0, 69.0, 60.0, 75.0, 79.0, 31.0, 59.0, 54.0, 62.0, 39.0, 53.0, 84.0, 75.0, None, 95.0, 54.0, 72.0, 60.0, 53.0, 25.0, 75.0, 42.0, 63.0, 32.0, 45.0, None, None, 69.0, 87.0, 61.0, 62.0, 41.0, 47.0, 72.0, 77.0, 55.0, 52.0, 65.0, 86.0, 65.0, 87.0, 39.0, 69.0, 70.0, 49.0, 30.0, 34.0, 69.0, 82.0, 60.0, 24.0, 82.0, 59.0, 41.0, 45.0, 78.0, 5.0, 27.0, 57.0, 94.0, 67.0, 18.0, 74.0, 82.0, 70.0, None, 53.0, 51.0, 53.0, 48.0, 14.0, 46.0, 49.0, 54.0, 35.0, 18.0, 53.0, 16.0, 43.0, 36.0, 80.0, 12.0, 64.0, 23.0, 56.0, 66.0, 18.0, 2.0, 65.0, 67.0, 64.0, 49.0, 66.0, 58.0, 71.0, 28.0, 52.0, 55.0, 57.0, 72.0, 22.0, 55.0, None, 63.0, 42.0, 89.0, 23.0, 66.0, 96.0, 79.0, 65.0, 80.0, 69.0, 62.0, 95.0, 96.0, 74.0, 70.0, 90.0, 60.0, 70.0, 65.0, 73.0, 2.0, 57.0, 49.0, 46.0, 59.0, 92.0, 66.0, 45.0, 12.0, 43.0, 81.0, 51.0, 59.0, 77.0, 69.0, 59.0, 73.0, 77.0, 57.0, 24.0, 48.0, 69.0, 71.0, 61.0, 37.0, 71.0, 19.0, 61.0, 61.0, None, 42.0, 52.0, 95.0, 34.0, 87.0, 19.0, 18.0, 47.0, 69.0, 79.0, 75.0, 61.0, 40.0, 45.0, 45.0, 59.0, 54.0, 73.0, 76.0, 33.0, 69.0, 70.0, 70.0, 51.0, 67.0, 45.0, 2.0, 81.0, 65.0, 84.0, 61.0, 66.0, 45.0, 81.0, 59.0, 65.0, 88.0, 84.0, 64.0, 56.0, 96.0, 47.0, 73.0, 29.0, 69.0, 46.0, 66.0, 20.0, 4.0, 63.0, 71.0, 71.0, None, 52.0, 56.0, 97.0, 62.0, 21.0, 51.0, 40.0, 83.0, 96.0, 56.0, None, 32.0, 49.0, 47.0, 12.0, 84.0, 32.0, 68.0, 25.0, 40.0, 37.0, 62.0, 51.0, 69.0, 62.0, 59.0, 22.0, 77.0, 82.0, 68.0, 43.0, 39.0, 19.0, 73.0, 55.0, 82.0, 63.0, None, 95.0, 42.0, 46.0, 67.0, 60.0, 22.0, 61.0, 89.0, 45.0, 42.0, 59.0, 82.0, 39.0, 59.0, 87.0, 66.0, None, 52.0, 60.0, 80.0, 65.0, 94.0, 66.0, 22.0, 90.0, 80.0, 1.0, 65.0, 59.0, 73.0, 48.0, 28.0, 27.0, 59.0, 51.0, 65.0, 64.0, 46.0, 56.0, 7.0, 83.0, 76.0, 71.0, 46.0, 69.0, 61.0, 64.0, 69.0, 56.0, 46.0, 56.0, None, 63.0, None, 67.0, 62.0, 61.0, 50.0, 42.0, 48.0, 77.0, 74.0, 26.0, 75.0, 67.0, 61.0, 67.0, 46.0, 53.0, 80.0, 52.0, 24.0, 56.0, 65.0, 45.0, 5.0, 70.0, 75.0, 32.0, 22.0, 87.0, 6.0, 53.0, 96.0, 52.0, 66.0, 73.0, 56.0, 45.0, 74.0, 75.0, 63.0, 44.0, 29.0, 69.0, 56.0, 87.0, 57.0, 78.0, 42.0, 34.0, 71.0, 50.0, 72.0, 76.0, 81.0, 41.0, 59.0, 83.0, 44.0, 71.0, 21.0, None, 58.0, 55.0, 52.0, 18.0, 69.0, 63.0, 72.0, 77.0, 45.0, None, 61.0, 94.0, 65.0, 29.0, 22.0, 69.0, 67.0, 94.0, 23.0, 63.0, 71.0, 72.0, 51.0, 55.0, 47.0, 54.0, 25.0, 72.0, 59.0, 87.0, None, 49.0, 46.0, 65.0, None, 71.0, 57.0, 45.0, 54.0, 41.0, 18.0, 64.0, 42.0, 83.0, 73.0, 73.0, 80.0, 75.0, 52.0, 48.0, 30.0, 53.0, 60.0, 61.0, 72.0, 44.0, 50.0, 45.0, 77.0, 26.0, 25.0, 73.0, 60.0, 89.0, 45.0, 28.0, 78.0, 68.0, 59.0, 71.0, 55.0, 87.0, 82.0, 51.0, 69.0, 67.0, 72.0, 70.0, 66.0, 71.0, 73.0, 77.0, 72.0, 72.0, 59.0, 83.0, 29.0, 51.0, 82.0, 78.0, 71.0, 59.0, 65.0, 50.0, 71.0, 70.0, 83.0, 39.0, 75.0, 40.0, 67.0, 75.0, 50.0, 67.0, 67.0, 47.0, 76.0, 34.0, 87.0, None, 87.0, 54.0, 27.0, 82.0, 45.0, 70.0, 45.0, 72.0, 56.0, 64.0, 63.0, 73.0, 64.0, 65.0, 72.0, 59.0, 78.0, 67.0, 8.0, 75.0, 62.0, 74.0, 60.0, 56.0, 14.0, 66.0, 71.0, 80.0, 65.0, 25.0, 19.0, 63.0, 66.0, 74.0, None, None, 62.0, 32.0, 82.0, 2.0, 74.0, 61.0, 74.0, 65.0, 71.0, 72.0, 44.0, 75.0, 67.0, 59.0, 58.0, 65.0, 73.0, 34.0, 5.0, 35.0, 96.0, 42.0, 73.0, None, 56.0, 65.0, 72.0, 46.0, 41.0, 84.0, None, None, 59.0, 69.0, 64.0, 77.0, 39.0, 94.0, 65.0, 67.0, 65.0, 59.0, None, None, 64.0, None, 95.0, 7.0, 97.0, 74.0, 50.0, 75.0, None, 68.0, None, None, 74.0, None, 59.0, 52.0, None, 58.0, 50.0, 61.0, 27.0, 71.0, 65.0, 79.0, 69.0, None, None, 79.0, None, None, 68.0, 73.0, 70.0, 43.0, 73.0, 69.0, 67.0, 66.0, 58.0, 19.0, 59.0, 63.0, 86.0, 47.0, 82.0, 45.0, 81.0, 63.0, 81.0, 84.0, 66.0, 63.0, 84.0, 69.0, 56.0, 45.0, 59.0, 64.0, 72.0, 51.0, 78.0, 49.0, 83.0, 55.0, 65.0, 66.0, 66.0, 63.0, 63.0, 87.0, 65.0, 65.0, 45.0, 23.0, None, 75.0, 51.0, 59.0, 81.0, 63.0, 82.0, 70.0, 70.0, 62.0, 76.0, 14.0, 65.0, 78.0, 56.0, 61.0, 71.0, 73.0, 81.0, 82.0, 76.0, 45.0, 80.0, 56.0, 45.0, 72.0, 69.0, 46.0, 80.0, 71.0, 56.0, 56.0, 59.0, 59.0, 79.0, 63.0, 60.0, 84.0, 65.0, 78.0, 65.0, 81.0, 67.0, 44.0, 46.0, 51.0, 48.0, 56.0, 65.0, 25.0, 74.0, 60.0, 59.0, 72.0, 66.0, 72.0, 59.0, 59.0, 67.0, None, 49.0, 66.0, 65.0, 69.0, 46.0, 55.0, 72.0, 56.0, 56.0, 56.0, 41.0, 58.0, 71.0, 66.0, 51.0, 56.0, 71.0, 83.0, 61.0, 42.0, 65.0, 21.0, 70.0, 41.0, 65.0, 81.0, 73.0, 62.0, 67.0, 81.0, 71.0, 81.0, 78.0, 80.0, 72.0, 73.0, 64.0, 2.0, 41.0, 70.0, 79.0, 71.0, 61.0, 67.0, None, 79.0, 75.0, 56.0, 44.0, 43.0, 73.0, 94.0, 57.0, 66.0, 56.0, 71.0, 65.0, 55.0, 46.0, 69.0, 71.0, 69.0, 73.0, 66.0, 43.0, 31.0, 66.0, 56.0, 86.0, 71.0, 75.0, 59.0, 94.0, 30.0, 45.0, 85.0, 65.0, 65.0, 66.0, 56.0, 65.0, 66.0, 63.0, 1.0, 67.0, 70.0, 88.0, 65.0, 80.0, 68.0, 65.0, 67.0, 59.0, 75.0, 72.0, 86.0, 73.0, 38.0, 79.0, 84.0, 73.0, 65.0, 46.0, 65.0, 76.0, 80.0, 51.0, 72.0, 61.0, 49.0, 56.0, 70.0, 56.0, 41.0, 63.0, 83.0, 43.0, 59.0, 55.0, 89.0, 57.0, 30.0, 89.0, 59.0, 66.0, 75.0, 65.0, 65.0, 69.0, 46.0, 69.0, 77.0, 87.0, 67.0, 65.0, 39.0, 74.0, 63.0, 87.0, 70.0, 62.0, 69.0, 94.0, 32.0, 45.0, 55.0, 75.0, 65.0, 97.0, 56.0, 12.0, 45.0, 59.0, 80.0, 53.0, 56.0, 40.0, 45.0, 89.0, 31.0, 69.0, 66.0, 86.0, 71.0, 57.0, 80.0, 45.0, 56.0, 45.0, 84.0, 89.0, 75.0, 75.0, 88.0, 81.0, 53.0, 46.0, 46.0, 78.0, 63.0, None, 54.0, 69.0, 62.0, 47.0, 78.0, 84.0, 76.0, 65.0, 94.0, 72.0, 72.0, 81.0, 54.0, 83.0, 82.0, 69.0, 30.0, 36.0, 59.0, 63.0, 35.0, 78.0, 14.0, 82.0, 77.0, 72.0, 65.0, 29.0, 65.0, 85.0, 86.0, 84.0, 70.0, 75.0, 88.0, 30.0, 72.0, 26.0, 65.0, 88.0, 88.0, 14.0, 27.0, 27.0, 28.0, 31.0, 28.0, 28.0, 26.0, 36.0, 53.0, 14.0, 36.0, 34.0, 35.0, 36.0, 26.0, 49.0, 27.0, 35.0, 28.0, 41.0, 53.0, 36.0, 14.0, 55.0, 28.0, 14.0, 55.0, 58.0, 53.0, 58.0, 55.0, 57.0, 44.0, 55.0, 14.0, 58.0, 34.0, 58.0, 53.0, 36.0, 17.0, 21.0, 21.0, 49.0, 6.0, 28.0, 45.0, 27.0, 55.0, 23.0, 28.0, 24.0, 31.0, 21.0, 35.0, 44.0, 24.0, 26.0, 52.0, 41.0, 23.0, 36.0, 41.0, 7.0, 24.0, 6.0, 55.0, 27.0, 35.0, 39.0, 21.0, 42.0, 21.0, 21.0, 16.0, 16.0, 23.0, 35.0, 35.0, 7.0, 30.0, 23.0, 51.0, 58.0, 3.0, 3.0, 21.0, 7.0, 6.0, 30.0, 23.0, 27.0, 55.0, 18.0, 21.0, 50.0, 23.0, 48.0, 17.0, 27.0, 52.0, 52.0, 39.0, 4.0, 27.0, 48.0, 44.0, 52.0, 28.0, 31.0, 42.0, 56.0, 17.0, 40.0, 46.0, 16.0, 58.0, 52.0, 46.0, 48.0, 56.0, 58.0, None, 16.0, 39.0, None, 16.0, 36.0, None, 3.0, 34.0, None, 52.0, 44.0, 56.0, 48.0, 7.0, 23.0, 48.0, None, 3.0, 6.0, 25.0, 16.0, 3.0, 16.0, 17.0, None, 52.0, 7.0, 21.0, 53.0, 26.0, 16.0, 53.0, 53.0, 11.0, 6.0, 6.0, 36.0, 46.0, 23.0, 53.0, 26.0, 41.0, 14.0, 7.0, 17.0, 40.0, 21.0, 17.0, 42.0, 8.0, 49.0, 52.0, 12.0, 4.0, 31.0, 35.0, 23.0, 47.0, 2.0, 40.0, 47.0, 35.0, 26.0, 18.0, 16.0, 4.0, 4.0, 14.0, 21.0, 22.0, 31.0, 11.0, 6.0, 21.0, 36.0, 49.0, 26.0, 4.0, 58.0, 21.0, 21.0, 5.0, 7.0, 3.0, 49.0, 18.0, 7.0, 18.0, 52.0, 23.0, 4.0, 23.0, 3.0, 3.0, 21.0, 4.0, 5.0, 17.0, 21.0, 13.0, 12.0, 18.0, 5.0, 5.0, 12.0, 5.0, 11.0, 15.0, 17.0, 17.0, 17.0, 36.0, 17.0, 36.0, 44.0, 18.0, 2.0, 5.0, 1.0, 18.0, 2.0, 11.0, 53.0, 10.0, 1.0, 3.0, 3.0, 33.0, 2.0, 20.0, 3.0, 4.0, 17.0, 23.0, 23.0, 1.0, 3.0, 14.0, 11.0, 5.0, 10.0, 11.0, 20.0, 2.0, 3.0, 2.0, 45.0, 22.0, 2.0, 34.0, 4.0, 3.0, 5.0, 26.0, 9.0, 49.0, 12.0, 1.0, 3.0, 1.0, 13.0, 48.0, 17.0, 28.0, 2.0, None, 3.0, 34.0, 2.0, 36.0, 9.0, 12.0, 18.0, 2.0, 27.0, 21.0, 1.0, 0.0, 20.0, 53.0, 6.0, 2.0, 2.0, 2.0, 1.0, 20.0, 22.0, 3.0, 4.0, 15.0, 22.0, 4.0, 25.0, 11.0, 49.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 12.0, 1.0, 15.0, 4.0, 2.0, 8.0, 8.0, 1.0, 0.0, 2.0, 1.0, 10.0, 2.0, 2.0, 1.0, 20.0, 10.0, 3.0, 9.0, 4.0, 22.0, 17.0, 1.0, 9.0, 30.0, 23.0, 26.0, 22.0, 22.0, 4.0, 4.0, 2.0, 23.0, 18.0, 1.0, 13.0, 6.0, 4.0, 2.0, 8.0, 15.0, 33.0, 5.0, 9.0, 22.0, 2.0, 1.0, 22.0, 1.0, 11.0, 57.0, 5.0, 0.0, None, 11.0, 62.0, 13.0, 1.0, 6.0, 20.0, 11.0, 0.0, 39.0, 2.0, 48.0, 2.0, 2.0, 8.0, 20.0, 2.0, 53.0, 2.0, 7.0, 9.0, 30.0, 5.0, 25.0, 58.0, 14.0, 11.0, 8.0, 3.0, 7.0, 1.0, 13.0, 30.0, 21.0, 3.0, 36.0, 37.0, 5.0, 3.0, 32.0, 14.0, 43.0, 20.0, 9.0, 35.0, 20.0, 53.0, 20.0, 39.0, 8.0, 2.0, 13.0, 2.0, 38.0, 1.0, 23.0, 11.0, 23.0, 15.0, 30.0, 36.0, 22.0, 0.0, 49.0, 20.0, 53.0, 45.0, 44.0, 15.0, 23.0, 29.0, 30.0, 23.0, 30.0, 53.0, 13.0, 30.0, 10.0, 2.0, 3.0, 27.0, 1.0, 10.0, 64.0, 30.0, 21.0, 8.0, 13.0, 28.0, 19.0, 15.0, 39.0, 25.0, 30.0, 12.0, 7.0, 25.0, 11.0, 38.0, 7.0, 11.0, 64.0, 1.0, 38.0, 17.0, 2.0, 32.0, 17.0, 5.0, 64.0, 30.0, 12.0, 4.0, 53.0, 4.0, 14.0, 53.0, 8.0, 11.0, 8.0, 30.0, 36.0, 2.0, 11.0, 5.0, 1.0, 17.0, 5.0, 12.0, 11.0, 6.0, 6.0, 36.0, 36.0, 33.0, 4.0, 40.0, 47.0, 19.0, 5.0, 10.0, 20.0, 15.0, 62.0, 3.0, 6.0, 11.0, 39.0, 33.0, 10.0, 10.0, 10.0, 3.0, 5.0, 13.0, 56.0, 8.0, 15.0, 6.0, 24.0, 8.0, 5.0, 10.0, 45.0, 1.0, 6.0, 21.0, 6.0, 45.0, 22.0, 51.0, 8.0, 14.0, 8.0, 8.0, 38.0, 11.0, 8.0, 3.0, 27.0, 6.0, 6.0, 19.0, 12.0, 2.0, 9.0, 5.0, 17.0, 11.0, 26.0, 33.0, 2.0, 7.0, 8.0, 10.0, 8.0, 2.0, 37.0, 4.0, 32.0, 6.0, 1.0, 10.0, 27.0, 3.0, 8.0, 3.0, 21.0, 6.0, 6.0, 1.0, 7.0, 17.0, 11.0, 8.0, 14.0, 38.0, 24.0, 6.0, 6.0, 59.0, 27.0, 40.0, 16.0, 2.0, 24.0, 6.0, 39.0, 14.0, 20.0, 6.0, 15.0, 10.0, 6.0, 12.0, 12.0, 38.0, 14.0, 52.0, 14.0, 38.0, 10.0, 6.0, 2.0, 38.0, 3.0, 51.0, 6.0, 31.0, 4.0, 4.0, 4.0, 28.0, 45.0, 1.0, 4.0, 2.0, 41.0, 3.0, 11.0, 56.0, 39.0, 1.0, 24.0, 6.0, 6.0, 4.0, 5.0, 3.0, 30.0, 12.0, 13.0, 5.0, 56.0, 6.0, 30.0, 21.0, 19.0, 7.0, 11.0, 32.0, 2.0, 7.0, 3.0, 18.0, 13.0, 7.0, 32.0, 47.0, 5.0, 8.0, 3.0, 59.0, 2.0, 12.0, 24.0, 10.0, 19.0, 4.0, 10.0, 9.0, 41.0, 21.0, 2.0, 3.0, 4.0, 14.0, 41.0, 7.0, 56.0, 8.0, 7.0, 39.0, 18.0, 4.0, 6.0, 6.0, 7.0, 4.0, 1.0, 5.0, 7.0, 18.0, 4.0, 4.0, 6.0, 4.0, 12.0, 30.0, 6.0, 6.0, 24.0, 4.0, 4.0, 23.0, 14.0, 14.0, 21.0, 17.0, 7.0, 7.0, 41.0, 7.0, 9.0, 13.0, 41.0, 3.0, 4.0, 16.0, 17.0, 62.0, 7.0, 39.0, 38.0, 5.0, 15.0, 8.0, 6.0, 11.0, 57.0, 9.0, 14.0, 7.0, 11.0, 9.0, 9.0, 21.0, 5.0, 5.0, 6.0, 7.0, 8.0, 5.0, 54.0, 24.0, 34.0, 8.0, 4.0, None, 7.0, 41.0, 35.0, 6.0, 10.0, 6.0, 5.0, 23.0, 24.0, 15.0, 5.0, 5.0, 3.0, 2.0, 13.0, 34.0, 5.0, 3.0, 55.0, 59.0, 4.0, 4.0, 43.0, 15.0, 8.0, 9.0, 2.0, 0.0, 6.0, 6.0, 4.0, 35.0, 2.0, 11.0, 5.0, 41.0, None, 6.0, 5.0, 7.0, 6.0, 4.0, 4.0, 5.0, 9.0, 5.0, 2.0, 7.0, None, 0.0, 42.0, 6.0, 6.0, 6.0, 16.0, 42.0, 36.0, 12.0, 7.0, 50.0, 14.0, 0.0, 0.0, 11.0, 5.0, 12.0, 9.0, 0.0, 8.0, 8.0, 6.0, 5.0, 21.0, 29.0, 2.0, 2.0, 35.0, 0.0, 7.0, 1.0, 7.0, 11.0, 2.0, 11.0, 18.0, 0.0, 4.0, 5.0, 4.0, 0.0, 4.0, 24.0, 12.0, 13.0, 21.0, 2.0, 9.0, 24.0, 4.0, 5.0, 5.0, 5.0, 31.0, 6.0, 5.0, 6.0, 4.0, 2.0, 9.0, 3.0, 4.0, 9.0, 10.0, 9.0, 0.0, 27.0, 4.0, 14.0, 9.0, 6.0, 4.0, 2.0, 0.0, 9.0, 3.0, 13.0, 36.0, 8.0, 5.0, 5.0, 1.0, 10.0, 11.0, 10.0, 38.0, 3.0, 0.0, 26.0, 9.0, 11.0, 8.0, 4.0, 3.0, 30.0, 12.0, 24.0, 29.0, 4.0, 10.0, 4.0, 11.0, 12.0, 11.0, 16.0, 0.0, 0.0, 0.0, 2.0, 8.0, 4.0, 5.0, 1.0, 6.0, 5.0, 0.0, 7.0, 3.0, 7.0, 4.0, 5.0, 4.0, 17.0, 4.0, 7.0, 1.0, 3.0, 0.0, 0.0, 9.0, 10.0, 0.0, 17.0, 1.0, 1.0, 17.0, 9.0, 6.0, 1.0, 0.0, 1.0, 5.0, 1.0, 8.0, 2.0, 7.0, 10.0, 3.0, 14.0, 7.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 9.0, 0.0, 0.0, 0.0, 8.0, 4.0, 0.0, None, 0.0, 7.0, None, 4.0, 0.0, 2.0, None, 3.0, 1.0, 1.0, 13.0, 10.0, 0.0, 5.0, 0.0, 0.0, 37.0, 0.0, 1.0, 8.0, 6.0, 6.0, 12.0, 1.0, 6.0, 1.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 51.0, 11.0, 0.0, 11.0, 6.0, 1.0, 6.0, 0.0, 38.0, 9.0, 5.0, 10.0, 1.0, 0.0, 0.0, 3.0, 7.0, 0.0, 5.0, 0.0, 7.0, 7.0, 3.0, 9.0, 6.0, 2.0, 4.0, 6.0, 3.0, 1.0, 0.0, 7.0, 3.0, 13.0, 1.0, 10.0, 4.0, 3.0, 6.0, 2.0, 3.0, 0.0, 2.0, 11.0, 6.0, 0.0, 0.0, 13.0, 11.0, 1.0, 1.0, 5.0, 30.0, 5.0, 12.0, 3.0, 4.0, 0.0, 6.0, 3.0, 0.0, 0.0, 2.0, 8.0, 1.0, 24.0, 12.0, 4.0, 13.0, 0.0, 12.0, 3.0, 1.0, 3.0, 1.0, 1.0, 33.0, 1.0, 0.0, 11.0, 0.0, 0.0, 2.0, 0.0, 2.0, 13.0, 12.0, 13.0, 1.0, 13.0, 2.0, 0.0, 33.0, 7.0, 0.0, 3.0, 1.0, 4.0, 1.0, 11.0, 3.0, 0.0, 3.0, 8.0, 4.0, 1.0, 13.0, 1.0, 13.0, 3.0, 4.0, 1.0, 1.0, 3.0, 11.0, 6.0, 1.0, 0.0, 5.0, None, 1.0, 0.0, 3.0, 10.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, None, 1.0, 1.0, 11.0, 0.0, 3.0, 10.0, 8.0, 5.0, 7.0, 0.0, 4.0, 4.0, 0.0, 0.0, None]
bike_score = [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 54.0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 27.0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 53.0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 69.0, 58.0, None, 54.0, 78.0, 51.0, 67.0, 70.0, None, 68.0, 68.0, 70.0, 58.0, 68.0, 80.0, 61.0, 70.0, 40.0, 59.0, 73.0, 69.0, 55.0, 77.0, 52.0, 68.0, 74.0, 68.0, None, 55.0, 43.0, 63.0, 70.0, 47.0, 64.0, 54.0, 68.0, 70.0, 68.0, 57.0, 55.0, 54.0, 56.0, 63.0, 79.0, 68.0, 56.0, 69.0, 52.0, 57.0, 60.0, 78.0, 78.0, 71.0, 83.0, 59.0, 73.0, 63.0, 54.0, 60.0, 54.0, 37.0, 54.0, 54.0, 61.0, 72.0, 67.0, 68.0, 76.0, 83.0, 68.0, 64.0, 69.0, 74.0, 60.0, 62.0, 57.0, 34.0, 60.0, 58.0, 79.0, 68.0, 65.0, 52.0, 73.0, 44.0, 64.0, 73.0, 40.0, 75.0, 72.0, 59.0, 45.0, 43.0, 46.0, 27.0, 64.0, 55.0, 39.0, 51.0, 60.0, 71.0, None, None, 64.0, 69.0, 59.0, 68.0, 74.0, 64.0, 55.0, 58.0, 53.0, 57.0, 53.0, 76.0, 70.0, 66.0, None, 66.0, 66.0, None, 68.0, 66.0, 44.0, 66.0, 68.0, 37.0, 65.0, 65.0, 68.0, 83.0, 69.0, 71.0, 62.0, 64.0, 52.0, 70.0, 74.0, 26.0, 64.0, 76.0, 73.0, 60.0, 44.0, 68.0, 84.0, 22.0, 60.0, 68.0, 62.0, 56.0, 54.0, None, 63.0, 45.0, 60.0, 73.0, 60.0, 68.0, 53.0, 53.0, 65.0, None, 27.0, 67.0, 52.0, 83.0, 60.0, 79.0, 68.0, 22.0, 73.0, 73.0, 63.0, 51.0, 71.0, 73.0, 56.0, 60.0, 50.0, 67.0, 70.0, 60.0, 60.0, 75.0, 60.0, 64.0, 56.0, 53.0, 58.0, 65.0, 55.0, 73.0, 60.0, 78.0, 51.0, 64.0, None, 75.0, 41.0, 68.0, None, 46.0, 79.0, 60.0, 72.0, 55.0, 48.0, 52.0, 54.0, 53.0, 64.0, 65.0, 54.0, 64.0, 48.0, 65.0, 60.0, 73.0, 68.0, 65.0, 37.0, 48.0, None, 55.0, 53.0, 66.0, 40.0, 62.0, None, None, 21.0, 27.0, 53.0, 65.0, 60.0, None, None, 68.0, 54.0, 48.0, 56.0, None, 70.0, 62.0, 45.0, 73.0, 60.0, 59.0, 52.0, 43.0, 46.0, 40.0, 65.0, 63.0, 74.0, 72.0, 64.0, 26.0, 66.0, 73.0, 51.0, 54.0, 70.0, 48.0, 60.0, 70.0, 61.0, 46.0, 61.0, 22.0, 70.0, 72.0, 61.0, 57.0, 55.0, 60.0, 63.0, 58.0, 45.0, 71.0, 53.0, 76.0, 57.0, None, 53.0, None, 58.0, 73.0, 56.0, 58.0, 58.0, 53.0, 52.0, 43.0, 51.0, 56.0, 61.0, 67.0, 66.0, 57.0, 59.0, 49.0, 61.0, 36.0, 64.0, 59.0, 63.0, 71.0, 56.0, 56.0, 60.0, 62.0, 60.0, 62.0, 64.0, None, 52.0, 67.0, 54.0, 49.0, 64.0, 54.0, 64.0, 61.0, 56.0, 68.0, 56.0, 59.0, 52.0, 53.0, None, 51.0, 78.0, 62.0, 57.0, 65.0, None, 54.0, 45.0, 72.0, 79.0, 56.0, 49.0, 68.0, 57.0, 62.0, 64.0, 45.0, 56.0, 27.0, 73.0, 57.0, 50.0, 53.0, 67.0, 47.0, None, 50.0, 45.0, 54.0, 58.0, 43.0, 55.0, 58.0, 56.0, 57.0, 54.0, 74.0, 64.0, 51.0, 75.0, 54.0, 52.0, 56.0, 78.0, 58.0, 52.0, 51.0, 57.0, 75.0, 48.0, 72.0, 65.0, 74.0, 77.0, 62.0, 61.0, 60.0, 60.0, 34.0, 41.0, 49.0, 61.0, 63.0, 70.0, 63.0, 60.0, 70.0, 60.0, 61.0, None, 48.0, 53.0, 65.0, 64.0, 53.0, 60.0, 60.0, 65.0, 65.0, 64.0, 60.0, 52.0, 60.0, None, 48.0, 26.0, 47.0, 51.0, 73.0, 78.0, None, 68.0, 75.0, 55.0, 64.0, 60.0, 59.0, 47.0, 57.0, 64.0, 48.0, 64.0, 60.0, 54.0, 65.0, 64.0, 69.0, 44.0, 62.0, 60.0, 68.0, 74.0, 60.0, 63.0, 48.0, 60.0, 56.0, 64.0, 71.0, 76.0, 46.0, 60.0, 57.0, 65.0, 64.0, 57.0, 59.0, 65.0, 57.0, 57.0, 51.0, 53.0, 52.0, 69.0, 61.0, 55.0, 65.0, 65.0, 61.0, 61.0, 68.0, 57.0, None, 60.0, 64.0, 73.0, 72.0, 68.0, 65.0, 53.0, 70.0, 56.0, 60.0, 62.0, 63.0, 63.0, 57.0, 53.0, 55.0, 47.0, 60.0, 45.0, 67.0, 73.0, 53.0, 69.0, 45.0, None, 55.0, 41.0, None, 83.0, 52.0, 68.0, 38.0, 77.0, 56.0, 63.0, 64.0, 52.0, None, 64.0, 51.0, 41.0, 66.0, 64.0, 46.0, 58.0, 64.0, 52.0, 53.0, 71.0, 71.0, None, 58.0, 64.0, 58.0, 32.0, None, 64.0, 64.0, 78.0, 53.0, 71.0, 31.0, 79.0, 53.0, 72.0, 53.0, 53.0, 43.0, 83.0, 48.0, 65.0, 64.0, 68.0, 74.0, 68.0, 53.0, 74.0, 78.0, 60.0, 71.0, 53.0, 52.0, 57.0, 60.0, 77.0, None, 69.0, 70.0, 64.0, 60.0, 57.0, 53.0, None, 53.0, 55.0, 64.0, 50.0, 68.0, 55.0, 63.0, 73.0, 63.0, 74.0, 51.0, 53.0, 81.0, 76.0, 54.0, 78.0, 54.0, 69.0, 49.0, 73.0, 64.0, 57.0, None, 40.0, 47.0, None, 27.0, 80.0, 72.0, None, None, 52.0, 64.0, 61.0, None, 57.0, 50.0, 65.0, None, 53.0, 46.0, 60.0, 37.0, 64.0, 58.0, 62.0, 47.0, 36.0, 53.0, 64.0, 50.0, 60.0, None, 61.0, 53.0, 66.0, 54.0, 73.0, 52.0, 62.0, 69.0, 41.0, 58.0, 71.0, 61.0, 59.0, 34.0, 74.0, 56.0, 59.0, 63.0, 66.0, 58.0, 61.0, 64.0, 58.0, 40.0, 54.0, 43.0, 65.0, 64.0, 64.0, 79.0, 58.0, 54.0, 58.0, 58.0, 62.0, 64.0, 70.0, 52.0, 55.0, 51.0, 73.0, 52.0, 51.0, None, 60.0, 62.0, 69.0, 60.0, 61.0, 61.0, 48.0, 51.0, 53.0, 53.0, 52.0, 52.0, 77.0, 57.0, 57.0, 76.0, 25.0, 61.0, 54.0, 55.0, 59.0, 74.0, 53.0, 61.0, 56.0, 63.0, 51.0, 75.0, 80.0, 57.0, 73.0, 42.0, 69.0, 52.0, 54.0, 34.0, 78.0, 35.0, 49.0, 56.0, None, 65.0, 68.0, 72.0, 64.0, 57.0, 44.0, 59.0, 48.0, 52.0, 54.0, 62.0, 42.0, 41.0, 53.0, 46.0, 59.0, 54.0, 44.0, 53.0, 73.0, 48.0, 53.0, 44.0, 77.0, 71.0, 53.0, 60.0, 65.0, 48.0, 65.0, 46.0, 50.0, 70.0, 72.0, 65.0, 50.0, 74.0, 61.0, 49.0, 36.0, 63.0, 48.0, 65.0, 58.0, None, 64.0, 62.0, 58.0, 63.0, 56.0, None, 77.0, 57.0, 79.0, 45.0, 46.0, 63.0, 64.0, 59.0, 55.0, 75.0, 77.0, None, 37.0, 71.0, 64.0, 49.0, 40.0, 52.0, 52.0, 71.0, 63.0, 68.0, 48.0, 64.0, 52.0, 56.0, 45.0, 53.0, 51.0, 68.0, 64.0, 75.0, 70.0, 73.0, 57.0, 49.0, 47.0, 36.0, 66.0, 52.0, 63.0, 53.0, 65.0, 58.0, 46.0, 70.0, 56.0, 58.0, 47.0, 61.0, 63.0, 51.0, 56.0, 53.0, 64.0, 60.0, 64.0, 61.0, 43.0, 65.0, 77.0, 61.0, 66.0, 65.0, 48.0, 56.0, 37.0, 55.0, 40.0, 62.0, 66.0, 47.0, 55.0, 46.0, 60.0, 63.0, 48.0, 60.0, 61.0, 44.0, 56.0, 63.0, 55.0, 41.0, 64.0, 62.0, 75.0, 50.0, 59.0, 61.0, 47.0, 60.0, 69.0, 73.0, 50.0, 59.0, 60.0, 51.0, 63.0, 73.0, 55.0, 58.0, 67.0, 67.0, 58.0, 64.0, 51.0, 62.0, 61.0, 56.0, 50.0, 50.0, 53.0, None, 70.0, 64.0, 52.0, 67.0, 53.0, 60.0, 63.0, 74.0, 57.0, 60.0, 55.0, 63.0, 75.0, 51.0, 54.0, 68.0, 44.0, 58.0, 60.0, 70.0, 74.0, 60.0, 60.0, 56.0, 61.0, 48.0, 58.0, 45.0, 71.0, 76.0, 45.0, 58.0, 58.0, 60.0, 58.0, 37.0, 58.0, 44.0, 52.0, 64.0, 50.0, 64.0, 38.0, 77.0, 44.0, 67.0, 58.0, 83.0, 44.0, 66.0, 75.0, 65.0, 74.0, 50.0, 57.0, 44.0, 64.0, 64.0, 63.0, 65.0, 59.0, 53.0, 64.0, 57.0, 64.0, 56.0, 64.0, 53.0, 54.0, 47.0, 56.0, 58.0, 61.0, 63.0, 64.0, 76.0, 68.0, 67.0, 58.0, 40.0, 42.0, 55.0, 64.0, 63.0, 70.0, 49.0, 51.0, 51.0, 58.0, 57.0, 50.0, 61.0, 60.0, 67.0, 64.0, 70.0, 65.0, 70.0, 49.0, 63.0, 73.0, 70.0, 63.0, 55.0, 68.0, 63.0, 50.0, 60.0, 61.0, 51.0, 58.0, 47.0, 31.0, 73.0, 57.0, 49.0, 80.0, 55.0, 77.0, 40.0, 60.0, 53.0, 61.0, 74.0, 80.0, 69.0, 50.0, 59.0, 57.0, 55.0, 73.0, 65.0, 54.0, 55.0, 52.0, 57.0, None, 47.0, 52.0, 49.0, 60.0, 64.0, 69.0, 46.0, None, 47.0, None, 59.0, None, None, None, None, None, None, 50.0, None, 50.0, None, 58.0, 40.0, None, 64.0, None, None, 48.0, 46.0, 42.0, None, 71.0, 63.0, 63.0, None, 54.0, 49.0, 52.0, 55.0, 54.0, 43.0, 56.0, None, None, 55.0, None, None, None, None, 51.0, None, 73.0, 33.0, 71.0, 67.0, None, 65.0, 55.0, 63.0, 48.0, 48.0, None, 55.0, 46.0, 65.0, 47.0, 55.0, 26.0, None, 38.0, None, 61.0, 58.0, 63.0, 53.0, 43.0, 37.0, 47.0, 50.0, 62.0, 38.0, 58.0, 60.0, 65.0, 63.0, 65.0, None, 73.0, 64.0, 56.0, 51.0, 78.0, 50.0, 53.0, 61.0, 63.0, 64.0, 70.0, 69.0, 63.0, 54.0, 61.0, 32.0, 50.0, 41.0, 63.0, 46.0, 63.0, 69.0, 75.0, 49.0, 47.0, 56.0, 50.0, 57.0, None, 60.0, 72.0, 68.0, 45.0, 47.0, 47.0, 61.0, 39.0, 54.0, 36.0, 52.0, 78.0, 58.0, None, 76.0, 57.0, 43.0, 43.0, 70.0, 58.0, 63.0, 30.0, 64.0, 76.0, 60.0, 64.0, 41.0, 61.0, 61.0, 63.0, 48.0, None, 51.0, 69.0, 63.0, 48.0, 63.0, 51.0, 51.0, 64.0, 48.0, 47.0, 56.0, 71.0, 53.0, 59.0, 79.0, 68.0, 73.0, 52.0, 43.0, 58.0, 75.0, 69.0, 57.0, 70.0, 60.0, 79.0, 64.0, 43.0, 68.0, 75.0, 64.0, 80.0, 51.0, 57.0, 64.0, 53.0, 68.0, 57.0, 73.0, 72.0, 57.0, 68.0, 64.0, 53.0, 67.0, 57.0, 63.0, 57.0, 51.0, 46.0, 46.0, 52.0, 53.0, 69.0, 76.0, 41.0, 70.0, 54.0, 62.0, 33.0, 72.0, 56.0, 49.0, 67.0, 41.0, 67.0, 48.0, 27.0, 75.0, 68.0, 82.0, 82.0, 76.0, 58.0, 50.0, 64.0, 72.0, 76.0, 60.0, 57.0, 65.0, 58.0, 69.0, 60.0, 49.0, 65.0, 56.0, 40.0, 58.0, 47.0, 53.0, 65.0, 66.0, 63.0, 58.0, None, 71.0, 55.0, 58.0, 48.0, 74.0, 57.0, 65.0, 69.0, 63.0, 68.0, 63.0, 63.0, 57.0, 58.0, 48.0, 64.0, 63.0, 51.0, 39.0, 44.0, 55.0, 65.0, 74.0, 48.0, 53.0, 62.0, 67.0, 38.0, 78.0, 58.0, 50.0, 32.0, 78.0, 49.0, 67.0, 40.0, 65.0, 56.0, 58.0, 44.0, 57.0, 64.0, 64.0, 73.0, 52.0, 50.0, 48.0, 58.0, 61.0, 51.0, 64.0, 62.0, 58.0, 54.0, 70.0, 55.0, 54.0, 50.0, 75.0, None, 50.0, 49.0, 64.0, 36.0, 64.0, 73.0, 74.0, 69.0, None, 54.0, 62.0, 80.0, 52.0, 58.0, 42.0, 63.0, 60.0, 73.0, 48.0, 58.0, 50.0, 52.0, 68.0, 52.0, 64.0, 53.0, 52.0, 64.0, 42.0, 53.0, 70.0, 70.0, 65.0, 60.0, 39.0, 73.0, 58.0, 48.0, 54.0, 70.0, 63.0, 48.0, 62.0, 67.0, 79.0, 69.0, 44.0, 74.0, 51.0, 58.0, 76.0, 71.0, None, 52.0, 68.0, 55.0, 69.0, None, 64.0, 65.0, 60.0, 69.0, 72.0, 64.0, 54.0, 50.0, 64.0, 75.0, 44.0, 56.0, 13.0, 83.0, 64.0, 64.0, 37.0, 49.0, 60.0, 71.0, 56.0, 51.0, 70.0, 60.0, None, 58.0, 60.0, 74.0, 62.0, 54.0, 64.0, 73.0, 49.0, 53.0, 75.0, 65.0, 58.0, 54.0, 54.0, 48.0, 53.0, 71.0, 54.0, 58.0, 69.0, 68.0, 40.0, 54.0, 79.0, 59.0, 64.0, 65.0, 54.0, 53.0, 56.0, 50.0, 58.0, 58.0, 65.0, 55.0, 83.0, 71.0, 64.0, 53.0, 76.0, 70.0, 48.0, 66.0, 53.0, 64.0, 64.0, 55.0, 47.0, 55.0, 62.0, 54.0, 63.0, 61.0, 59.0, None, 71.0, 52.0, 72.0, 58.0, 68.0, 54.0, 36.0, 67.0, 63.0, 46.0, 78.0, 71.0, 47.0, 75.0, 54.0, 39.0, 64.0, 81.0, 70.0, 65.0, 52.0, 64.0, 58.0, None, 46.0, 70.0, 73.0, 64.0, 59.0, 67.0, 70.0, 71.0, 69.0, 71.0, 74.0, 60.0, 72.0, 60.0, 65.0, 56.0, 41.0, 64.0, 74.0, 59.0, 54.0, None, 32.0, 63.0, 56.0, 73.0, 61.0, 59.0, 49.0, 80.0, 63.0, 48.0, 80.0, 71.0, 83.0, 85.0, 42.0, 50.0, 74.0, 53.0, 69.0, None, 64.0, 69.0, 35.0, 60.0, 66.0, 61.0, 71.0, 63.0, 64.0, 39.0, 53.0, 56.0, None, 61.0, 66.0, 53.0, 52.0, 81.0, 68.0, 74.0, 66.0, 76.0, 70.0, 64.0, 56.0, 65.0, 83.0, 53.0, 50.0, 73.0, 60.0, 73.0, 63.0, 72.0, 75.0, 56.0, 61.0, 41.0, 71.0, 62.0, 57.0, 44.0, 76.0, 34.0, 39.0, 62.0, 53.0, 42.0, 58.0, 60.0, 27.0, 71.0, 73.0, 48.0, 75.0, None, 64.0, 58.0, 40.0, 64.0, 64.0, 76.0, 56.0, 48.0, 70.0, 69.0, 49.0, 73.0, 64.0, 66.0, 65.0, 72.0, 74.0, 66.0, 73.0, 47.0, 48.0, 43.0, 72.0, 31.0, 68.0, 62.0, 70.0, 75.0, 59.0, 70.0, 56.0, 64.0, 65.0, 70.0, 62.0, 70.0, 65.0, 72.0, 62.0, 65.0, 65.0, 77.0, 80.0, 51.0, 55.0, 45.0, 69.0, 53.0, 60.0, 83.0, 41.0, 60.0, 45.0, 71.0, 43.0, 77.0, 75.0, 46.0, 43.0, None, 66.0, 76.0, 61.0, 62.0, 68.0, 66.0, 58.0, 72.0, 63.0, 63.0, 76.0, 53.0, 66.0, 65.0, 80.0, 63.0, 27.0, 63.0, 69.0, 48.0, 59.0, 71.0, 53.0, 71.0, 80.0, 78.0, 76.0, 58.0, 74.0, 73.0, 58.0, 70.0, 67.0, 71.0, 72.0, 47.0, 45.0, 69.0, 64.0, 72.0, 65.0, 57.0, 73.0, 64.0, 79.0, 46.0, 83.0, 58.0, None, None, 75.0, 59.0, 73.0, 27.0, 77.0, 80.0, 83.0, 77.0, 72.0, 72.0, 61.0, 46.0, 48.0, 81.0, 60.0, None, 78.0, 73.0, 70.0, 42.0, 64.0, 69.0, None, 54.0, 72.0, 55.0, 83.0, 72.0, 52.0, 44.0, 46.0, 80.0, 57.0, 35.0, 69.0, 42.0, 69.0, 70.0, 60.0, 73.0, 37.0, 75.0, 26.0, 68.0, 50.0, 75.0, 42.0, 37.0, 80.0, 29.0, 54.0, 58.0, 79.0, 56.0, 69.0, 64.0, 59.0, 60.0, 77.0, 55.0, 63.0, 48.0, 73.0, 70.0, 67.0, 85.0, 78.0, 72.0, 54.0, 69.0, 75.0, 45.0, 80.0, 41.0, None, 73.0, 63.0, 69.0, 66.0, 47.0, 46.0, 64.0, 74.0, 70.0, 72.0, 65.0, 85.0, 72.0, 70.0, 68.0, 76.0, 83.0, 67.0, 53.0, 73.0, 58.0, 58.0, 53.0, 36.0, 50.0, 59.0, 56.0, 42.0, 50.0, 51.0, 70.0, 39.0, 67.0, 74.0, None, 67.0, 71.0, 64.0, 73.0, 64.0, 43.0, 75.0, 47.0, None, 60.0, 85.0, 83.0, 60.0, 60.0, 70.0, 53.0, 74.0, 76.0, 65.0, 65.0, 78.0, 46.0, 52.0, 69.0, 72.0, 71.0, 60.0, 65.0, 64.0, 52.0, 34.0, 71.0, 43.0, 71.0, 75.0, 65.0, 60.0, 60.0, 69.0, 39.0, 64.0, 75.0, 52.0, 64.0, 47.0, 77.0, 72.0, 46.0, 63.0, 37.0, 69.0, 63.0, 63.0, 60.0, 60.0, 81.0, None, 64.0, 80.0, 57.0, 71.0, 79.0, 52.0, 66.0, 60.0, None, 61.0, 77.0, 66.0, 67.0, 74.0, 73.0, 82.0, 46.0, 80.0, 56.0, 64.0, 76.0, 46.0, 78.0, 60.0, 68.0, 73.0, 42.0, 72.0, None, 63.0, 66.0, 39.0, 72.0, 65.0, 63.0, 68.0, 49.0, 59.0, 59.0, 79.0, 75.0, 63.0, 73.0, 76.0, 66.0, 35.0, None, 44.0, 64.0, 78.0, 71.0, 59.0, 70.0, 55.0, 60.0, 56.0, 80.0, 79.0, 71.0, 80.0, 77.0, 64.0, 63.0, 58.0, 63.0, 63.0, 14.0, 81.0, 64.0, 79.0, 81.0, 62.0, 71.0, 85.0, 67.0, 75.0, 64.0, 73.0, 43.0, 57.0, 65.0, 62.0, 35.0, 69.0, 81.0, 64.0, None, 53.0, 55.0, 70.0, 63.0, 65.0, 43.0, 64.0, 43.0, 68.0, 57.0, 69.0, None, None, 74.0, 67.0, 70.0, 69.0, 61.0, 36.0, 79.0, 76.0, 73.0, 72.0, 68.0, 61.0, 83.0, 83.0, 62.0, 69.0, 80.0, 64.0, 35.0, 37.0, 70.0, 81.0, 75.0, 44.0, 79.0, 71.0, 64.0, 40.0, 58.0, 33.0, 52.0, 73.0, 60.0, 68.0, 60.0, 73.0, 67.0, 57.0, None, 67.0, 62.0, 55.0, 69.0, 12.0, 52.0, 78.0, 48.0, 58.0, 38.0, 69.0, 45.0, 63.0, 31.0, 75.0, 23.0, 67.0, 22.0, 66.0, 75.0, 38.0, 29.0, 64.0, 67.0, None, 60.0, 83.0, 52.0, 58.0, 44.0, 53.0, 68.0, 56.0, 60.0, 60.0, 62.0, None, 57.0, 43.0, 69.0, 45.0, 71.0, 65.0, 74.0, 73.0, 75.0, 79.0, 61.0, 56.0, 64.0, 82.0, 69.0, 64.0, 67.0, 58.0, 73.0, 72.0, 26.0, 40.0, 60.0, 61.0, 57.0, 73.0, 68.0, 69.0, None, 63.0, 70.0, 61.0, 71.0, 77.0, 67.0, 57.0, 58.0, 79.0, 68.0, 45.0, 67.0, 72.0, 68.0, 69.0, 71.0, 85.0, 60.0, 45.0, 68.0, None, 62.0, 65.0, 56.0, 65.0, 67.0, 41.0, 41.0, 49.0, 60.0, 78.0, 80.0, 68.0, 63.0, 69.0, 40.0, 55.0, 56.0, 68.0, 61.0, 68.0, 69.0, 69.0, 80.0, 56.0, 69.0, 44.0, 23.0, 70.0, 83.0, 80.0, 67.0, 67.0, 68.0, 77.0, 78.0, 73.0, 74.0, 65.0, 68.0, 72.0, 65.0, 60.0, 80.0, 56.0, 71.0, 61.0, 78.0, 41.0, 34.0, 66.0, 69.0, 70.0, None, 67.0, 58.0, 57.0, 58.0, 59.0, 76.0, 45.0, 73.0, 65.0, 61.0, None, 64.0, 64.0, 67.0, 33.0, 80.0, 50.0, 70.0, 42.0, 63.0, 71.0, 68.0, 56.0, 67.0, 68.0, 57.0, 42.0, 64.0, 75.0, 63.0, 67.0, 52.0, 39.0, 81.0, 67.0, 79.0, 65.0, None, 56.0, 44.0, 77.0, 70.0, 64.0, 41.0, 66.0, 69.0, 42.0, 54.0, 55.0, 69.0, 45.0, 65.0, 67.0, 63.0, None, 51.0, 76.0, 79.0, 45.0, 60.0, 78.0, 57.0, 69.0, 65.0, 27.0, 62.0, 64.0, 73.0, 59.0, 66.0, 72.0, 67.0, 76.0, 45.0, 69.0, 61.0, 66.0, 27.0, 80.0, 61.0, 83.0, 61.0, 69.0, 69.0, 83.0, 57.0, 58.0, 63.0, 60.0, None, 66.0, None, 70.0, 68.0, 59.0, 81.0, 40.0, 35.0, 77.0, 73.0, 51.0, 69.0, 58.0, 71.0, 70.0, 63.0, 51.0, 77.0, 53.0, 62.0, 55.0, 83.0, 45.0, 26.0, 74.0, 60.0, 64.0, 27.0, 71.0, 29.0, 70.0, 74.0, 67.0, 64.0, 60.0, 71.0, 52.0, 83.0, 60.0, 57.0, 46.0, 65.0, 54.0, 66.0, 65.0, 74.0, 78.0, 40.0, 57.0, 69.0, 79.0, 63.0, 77.0, 56.0, 52.0, 54.0, 80.0, 57.0, 73.0, 39.0, None, 67.0, 67.0, 71.0, 48.0, 71.0, 63.0, 82.0, 79.0, 52.0, None, 70.0, 60.0, 73.0, None, 27.0, 71.0, 67.0, 60.0, 44.0, 67.0, 49.0, 67.0, 59.0, 57.0, 57.0, 56.0, 55.0, 72.0, 78.0, 67.0, None, 69.0, 49.0, 69.0, None, 70.0, 68.0, 52.0, 55.0, 61.0, 41.0, 68.0, 56.0, 70.0, 57.0, 71.0, 73.0, 72.0, 65.0, 69.0, 63.0, 61.0, 75.0, 70.0, 72.0, 69.0, 73.0, 44.0, 73.0, 51.0, 48.0, 73.0, 69.0, 69.0, 52.0, 66.0, 88.0, 71.0, 57.0, 68.0, 52.0, 74.0, 70.0, 75.0, 71.0, 69.0, 76.0, 63.0, 69.0, 69.0, 80.0, 64.0, 70.0, 80.0, 57.0, 63.0, 54.0, 80.0, 75.0, 72.0, 69.0, 57.0, 73.0, 50.0, 67.0, 59.0, 80.0, 59.0, 77.0, 45.0, 70.0, 60.0, 66.0, 73.0, 54.0, 67.0, 72.0, 44.0, 67.0, None, 67.0, 58.0, 64.0, 75.0, 40.0, 59.0, 52.0, 69.0, 55.0, 67.0, 57.0, 80.0, 68.0, 83.0, 72.0, 67.0, 79.0, 70.0, 37.0, 60.0, 64.0, 73.0, 72.0, 66.0, 21.0, 68.0, 78.0, 74.0, 69.0, 64.0, 32.0, 59.0, 69.0, 72.0, None, None, 64.0, 64.0, 75.0, 29.0, 80.0, 72.0, 78.0, 73.0, 72.0, 72.0, 46.0, 77.0, 70.0, 57.0, 65.0, 69.0, 67.0, 46.0, 26.0, 46.0, 64.0, 54.0, 75.0, None, 75.0, 63.0, 67.0, 61.0, 81.0, 83.0, None, None, 61.0, 68.0, 68.0, 67.0, 52.0, 75.0, 75.0, 69.0, 56.0, 57.0, None, None, 71.0, None, 56.0, 27.0, 70.0, 71.0, 49.0, 72.0, None, 70.0, None, None, 71.0, None, 57.0, 65.0, None, 72.0, 82.0, 72.0, 64.0, 67.0, 69.0, 80.0, 72.0, None, None, 78.0, None, None, 78.0, 67.0, 63.0, 50.0, 71.0, 71.0, 58.0, 64.0, 62.0, 32.0, 57.0, 68.0, 65.0, 47.0, 70.0, 52.0, 69.0, 73.0, 64.0, 67.0, 68.0, 57.0, 68.0, 71.0, 66.0, 52.0, 57.0, 59.0, 72.0, 56.0, 72.0, 64.0, 64.0, 61.0, 66.0, 67.0, 67.0, 66.0, 72.0, 72.0, 69.0, 73.0, 52.0, 62.0, None, 61.0, 57.0, 57.0, 63.0, 71.0, 75.0, 63.0, None, 63.0, 77.0, 58.0, 69.0, 78.0, 76.0, 67.0, 78.0, 72.0, 67.0, 75.0, 77.0, 47.0, 71.0, 55.0, 52.0, 72.0, 79.0, 61.0, 75.0, 69.0, 54.0, 71.0, 54.0, 51.0, 72.0, 67.0, 56.0, 76.0, 83.0, 77.0, 73.0, 69.0, 79.0, 77.0, 61.0, 79.0, 67.0, 66.0, 69.0, 42.0, 78.0, 76.0, 51.0, 80.0, 63.0, 77.0, 57.0, 51.0, 70.0, None, 69.0, 78.0, 58.0, 56.0, 61.0, 63.0, 71.0, 66.0, 67.0, 71.0, 81.0, 62.0, 69.0, 67.0, 51.0, 66.0, 71.0, 64.0, 56.0, 45.0, 80.0, 41.0, 63.0, 74.0, 73.0, 79.0, 77.0, 72.0, 57.0, 77.0, 76.0, 64.0, 74.0, 78.0, 72.0, 72.0, 69.0, 27.0, None, 62.0, 73.0, 72.0, 69.0, 72.0, None, 80.0, 61.0, 58.0, 79.0, 31.0, 75.0, 75.0, 72.0, 78.0, 71.0, 68.0, 73.0, 56.0, 61.0, 72.0, 72.0, 72.0, 74.0, 78.0, 31.0, 33.0, 67.0, 71.0, 68.0, 79.0, 78.0, 57.0, 75.0, 68.0, 52.0, 63.0, 73.0, 69.0, 74.0, 66.0, 63.0, 78.0, 67.0, 27.0, 70.0, 75.0, 74.0, 69.0, 76.0, 73.0, 82.0, 70.0, 58.0, 72.0, 72.0, 71.0, 75.0, 67.0, 74.0, 73.0, 75.0, 69.0, 78.0, 69.0, 73.0, 79.0, 55.0, 80.0, 56.0, 60.0, 66.0, 67.0, 66.0, 81.0, 68.0, 78.0, 50.0, 70.0, 71.0, 69.0, 55.0, 68.0, 69.0, 57.0, 72.0, 61.0, 69.0, 51.0, 72.0, 61.0, 72.0, 60.0, 68.0, 70.0, 69.0, 80.0, 78.0, 68.0, 67.0, 63.0, 61.0, 77.0, 75.0, 71.0, 52.0, 54.0, 64.0, 51.0, 70.0, 66.0, None, 82.0, 58.0, 74.0, 61.0, 66.0, 71.0, 82.0, 70.0, 33.0, 72.0, 78.0, 74.0, 65.0, 72.0, 74.0, 52.0, 66.0, 82.0, 73.0, 73.0, 72.0, 64.0, 70.0, 70.0, 71.0, 61.0, 61.0, 77.0, 68.0, None, 78.0, 71.0, 56.0, 52.0, 68.0, 73.0, 75.0, 69.0, 75.0, 80.0, 80.0, 70.0, 78.0, 78.0, 70.0, 72.0, 68.0, 68.0, 58.0, 55.0, 76.0, 66.0, 58.0, 73.0, 72.0, 62.0, 69.0, 26.0, 69.0, 68.0, 76.0, 70.0, 59.0, 70.0, 72.0, 68.0, 71.0, 67.0, 69.0, 73.0, 73.0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]

In [20]:
external_df = spark.createDataFrame(df[['NO']].toPandas().assign(WALKSCORE=walk_score, BIKESCORE=bike_score))
df = df.join(external_df, on='NO', how='left')

In [21]:
df = df.withColumns(
    {
        "LIST_YEAR": F.year('LISTDATE'),
        "LIST_MONTH": F.month('LISTDATE'),
        "LIST_WEEKOFYEAR": F.weekofyear('LISTDATE'),
        "LIST_DAYOFMONTH": F.dayofmonth('LISTDATE'),
        "LIST_DAYOFYEAR": F.dayofyear('LISTDATE')
    }
)
df = df.withColumn('BED_TO_BATHS', df['BEDROOMS'] / df['BATHSTOTAL'])
df = df.withColumn('SQFT_TOTAL', df['SQFTABOVEGROUND'] + df['SQFTBELOWGROUND'])
df = df.withColumn('LISTING_PRICE_PER_SQFT', df['LISTPRICE'] / df['SQFT_TOTAL'])
print(len(df.columns))

85


In [22]:
def split_explode_join(df, groupby, column, sep=', ', concat_sep='_'):
    exploded_df = df[[groupby, column]]\
        .fillna('NaN', subset=column)\
        .withColumn(f'{column}_LIST' , F.split(F.upper(column), sep))\
        .withColumn(f'EXPLODED_{column}_LIST', F.explode(f'{column}_LIST'))\
        .withColumn(f'EXPLODED_{column}_LIST', F.concat(F.lit(f'{column}{concat_sep}'), F.trim(f'EXPLODED_{column}_LIST')))\
        .withColumn('ONE', F.lit(1))\
        .groupBy(groupby).pivot(f'EXPLODED_{column}_LIST').agg(F.coalesce(F.first('ONE')))
    return df.join(exploded_df, on=groupby, how='left')

In [23]:
sc.setCheckpointDir('.')
split_ohe_columns = ['FENCE', 'ROOF', 'POOLDESCRIPTION', 'GARAGEDESCRIPTION', 'APPLIANCES', 'EXTERIOR', 'DININGROOMDESCRIPTION', 'BASEMENT', 'BATHDESC', 'ZONING', 'COOLINGDESCRIPTION']
for column in split_ohe_columns:
    df = split_explode_join(df, 'NO', column).checkpoint()
print(len(df.columns))

247


In [24]:
def prefixed_join(df, groupby, column, concat_sep=':'):
    prefixed_df = df[[groupby, column]]\
        .dropna(subset=column)\
        .withColumn(f'{column.upper()}_PREFIXED', F.concat(F.lit(f'{column}{concat_sep}'), F.upper(F.trim(column))))\
        .withColumn('ONE', F.lit(1))\
        .groupBy(groupby).pivot(f'{column}_PREFIXED').agg(F.coalesce(F.first('ONE')))
    return df.join(prefixed_df, on=groupby, how='left')  

In [25]:
ohe_columns = ['CITY', 'LISTTYPE', 'SCHOOLDISTRICTNUMBER', 'POTENTIALSHORTSALE', 'STYLE', 'ASSUMABLEMORTGAGE', 'ASSESSMENTPENDING']
for column in ohe_columns:
    df = prefixed_join(df, 'NO', column).checkpoint()
print(len(df.columns))

292


In [26]:
# Columns that could not be reproduced
not_reproduced_columns = [c for c in exercise_columns if c not in df.columns]
print(len(not_reproduced_columns))
not_reproduced_columns

15


['MORTGAGE30US',
 'MORTGAGE30US-1WK',
 'MORTGAGE30US-2WK',
 'MORTGAGE30US-3WK',
 'MORTGAGE30US-4WK',
 'PERCENT_OLDER_AGE_HOMES',
 'PERCENT_SIMILAR_AGE_HOMES',
 'PERCENT_NEWER_AGE_HOMES',
 'PERCENT_BIGGER_SIZE_HOMES',
 'PERCENT_SIMILAR_SIZE_HOMES',
 'PERCENT_SMALLER_SIZE_HOMES',
 'LISTING_TO_MEDIAN_RATIO',
 'ASSESSED_TO_LIST',
 'TAX_TO_LIST',
 'PRICE_REDUCTION_PERCENT']

In [27]:
# Columns not in the exercise
unwanted_columns = [c for c in df.columns if c not in exercise_columns]
print(len(unwanted_columns))
unwanted_columns

54


['NO',
 'MLSID',
 'STREETNUMBERNUMERIC',
 'STREETADDRESS',
 'STREETNAME',
 'POSTALCODE',
 'STATEORPROVINCE',
 'CITY',
 'LISTDATE',
 'LISTTYPE',
 'PRICEPERTSFT',
 'FENCE',
 'MAPLETTER',
 'LOTSIZEDIMENSIONS',
 'SCHOOLDISTRICTNUMBER',
 'OFFMARKETDATE',
 'ROOMAREA4',
 'ROOMTYPE',
 'ROOF',
 'ROOMFLOOR4',
 'POTENTIALSHORTSALE',
 'POOLDESCRIPTION',
 'PDOM',
 'GARAGEDESCRIPTION',
 'ROOMFLOOR1',
 'ROOMAREA1',
 'TAXYEAR',
 'UNITNUMBER',
 'ZONING',
 'STYLE',
 'COOLINGDESCRIPTION',
 'APPLIANCES',
 'BACKONMARKETDATE',
 'ROOMFAMILYCHAR',
 'ROOMAREA3',
 'EXTERIOR',
 'ROOMFLOOR3',
 'ROOMFLOOR2',
 'ROOMAREA2',
 'DININGROOMDESCRIPTION',
 'BASEMENT',
 'CLASS',
 'BATHDESC',
 'ROOMAREA5',
 'ROOMFLOOR5',
 'ROOMAREA6',
 'ROOMFLOOR6',
 'ROOMAREA7',
 'ROOMFLOOR7',
 'ROOMAREA8',
 'ROOMFLOOR8',
 'ASSUMABLEMORTGAGE',
 'ASSESSMENTPENDING',
 'features']

In [28]:
# Checksum
len(df.columns) - len(unwanted_columns) + len(not_reproduced_columns)

253

In [29]:
# Droping columns not specified by the exercise (irrelevant, leakage, etc)
# df = df.drop(*unwanted_columns)
# len(df.columns)

### Dropping Columns with Low Observations

> In this exercise, we are going to remove columns that have less than 30 observations. 30 is a common minimum number of observations for statistical significance. Any less than that and the relationships cause overfitting because of a sheer coincidence!In this exercise, we are going to remove columns that have less than 30 observations. 30 is a common minimum number of observations for statistical significance. Any less than that and the relationships cause overfitting because of a sheer coincidence!

In [30]:
binary_prefixes = [x+'_' for x in split_ohe_columns] + [x+':' for x in ohe_columns]
binary_cols = [col for col in df.columns if any(col.startswith(prefix) for prefix in binary_prefixes)]
print(binary_cols)

['FENCE_CHAIN LINK', 'FENCE_ELECTRIC', 'FENCE_FULL', 'FENCE_INVISIBLE', 'FENCE_NAN', 'FENCE_NONE', 'FENCE_OTHER', 'FENCE_PARTIAL', 'FENCE_PRIVACY', 'FENCE_RAIL', 'FENCE_WIRE', 'FENCE_WOOD', 'ROOF_AGE 8 YEARS OR LESS', 'ROOF_AGE OVER 8 YEARS', 'ROOF_ASPHALT SHINGLES', 'ROOF_FLAT', 'ROOF_METAL', 'ROOF_NAN', 'ROOF_OTHER', 'ROOF_PITCHED', 'ROOF_RUBBER', 'ROOF_SHAKES', 'ROOF_SLATE', 'ROOF_TAR/GRAVEL', 'ROOF_TILE', 'ROOF_UNSPECIFIED SHINGLE', 'ROOF_WOOD SHINGLES', 'POOLDESCRIPTION_ABOVE GROUND', 'POOLDESCRIPTION_BELOW GROUND', 'POOLDESCRIPTION_HEATED', 'POOLDESCRIPTION_INDOOR', 'POOLDESCRIPTION_NAN', 'POOLDESCRIPTION_NONE', 'POOLDESCRIPTION_OUTDOOR', 'POOLDESCRIPTION_SHARED', 'GARAGEDESCRIPTION_ASSIGNED', 'GARAGEDESCRIPTION_ATTACHED GARAGE', 'GARAGEDESCRIPTION_CARPORT', 'GARAGEDESCRIPTION_CONTRACT PKG REQUIRED', 'GARAGEDESCRIPTION_COVERED', 'GARAGEDESCRIPTION_DETACHED GARAGE', 'GARAGEDESCRIPTION_DRIVEWAY - ASPHALT', 'GARAGEDESCRIPTION_DRIVEWAY - CONCRETE', 'GARAGEDESCRIPTION_DRIVEWAY - GRAVE

In [31]:
def drop_low_observation_columns(df, columns, threshold=30):
    return df.drop(*np.array(columns)[np.array([df.agg({f'`{col}`': 'sum'}).collect()[0][0] < threshold for col in columns])])

new_df = drop_low_observation_columns(df, binary_cols)

print('Rows: ' + str(df.count()) + ' Columns: ' + str(len(df.columns)))
print('Rows: ' + str(new_df.count()) + ' Columns: ' + str(len(new_df.columns)))

Rows: 5000 Columns: 292
Rows: 5000 Columns: 244


> Removing low observation features is helpful in many ways. It can improve processing speed of model training, prevent overfitting by coincidence and help interpretability by reducing the number of things to consider.

### Naively Handling Missing and Categorical Values

Random Forest Regression is robust enough to allow us to ignore many of the more time consuming and tedious data preparation steps. While some implementations of Random Forest handle missing and categorical values automatically, PySpark's does not. The math remains the same however so we can get away with some naive value replacements.

For missing values since our data is strictly positive, we will assign `-1`. The random forest will split on this value and handle it differently than the rest of the values in the same feature.

For categorical values, we can just map the text values to numbers and again the random forest will appropriately handle them by splitting on them. In this example, we will dust off `pipelines` from Introduction to PySpark to write our code more concisely. Please note that the exercise will start by displaying the `dtypes` of the columns in the dataframe, compare them to the results at the end of this exercise.

In [32]:
categorical_cols = [
    "CITY",
    "LISTTYPE",
    "SCHOOLDISTRICTNUMBER",
    "POTENTIALSHORTSALE",
    "STYLE",
    "ASSUMABLEMORTGAGE",
    "ASSESSMENTPENDING",
]

from pyspark.ml import Pipeline
from pyspark.ml.feature import StringIndexer

# Replace missing values
df = df.fillna(-1, subset=['WALKSCORE', 'BIKESCORE'])
# Create list of StringIndexers using list comprehension
indexers = [StringIndexer(inputCol=col, outputCol=col+"_IDX")\
            .setHandleInvalid("keep") for col in categorical_cols]
# Create pipeline of indexers
indexer_pipeline = Pipeline(stages=indexers)
# Fit and Transform the pipeline to the original data
df_indexed = indexer_pipeline.fit(df).transform(df)

In [33]:
# Clean up redundant columns
df_indexed = df_indexed.drop(*categorical_cols)
# Inspect data transformations
print(df_indexed.dtypes)

[('NO', 'bigint'), ('MLSID', 'string'), ('STREETNUMBERNUMERIC', 'bigint'), ('STREETADDRESS', 'string'), ('STREETNAME', 'string'), ('POSTALCODE', 'bigint'), ('STATEORPROVINCE', 'string'), ('SALESCLOSEPRICE', 'bigint'), ('LISTDATE', 'date'), ('LISTPRICE', 'bigint'), ('ORIGINALLISTPRICE', 'bigint'), ('PRICEPERTSFT', 'double'), ('FOUNDATIONSIZE', 'bigint'), ('FENCE', 'string'), ('MAPLETTER', 'string'), ('LOTSIZEDIMENSIONS', 'string'), ('DAYSONMARKET', 'bigint'), ('OFFMARKETDATE', 'date'), ('FIREPLACES', 'bigint'), ('ROOMAREA4', 'string'), ('ROOMTYPE', 'string'), ('ROOF', 'string'), ('ROOMFLOOR4', 'string'), ('POOLDESCRIPTION', 'string'), ('PDOM', 'bigint'), ('GARAGEDESCRIPTION', 'string'), ('SQFTABOVEGROUND', 'bigint'), ('TAXES', 'bigint'), ('ROOMFLOOR1', 'string'), ('ROOMAREA1', 'string'), ('TAXWITHASSESSMENTS', 'double'), ('TAXYEAR', 'bigint'), ('LIVINGAREA', 'bigint'), ('UNITNUMBER', 'string'), ('YEARBUILT', 'bigint'), ('ZONING', 'string'), ('ACRES', 'double'), ('COOLINGDESCRIPTION', 's

> As you can hopefully see, handling missing and categorical values for Random Forest Regression is fairly painless compared to some of the other things we would have had to do if we chose a different algorithm!

# Building a Model

## RandomForestRegressor
**Basic Model Parameters**
* featuresCol="features"
* labelCol="label"
* predictionCol="prediction"  
* seed=None

**Our Model Parameter values**
* featuresCol="features"
* labelCol="SALESCLOSEPRICE"
* predictionCol="Prediction_Price"
* seed=42

## Training a Random Forest

In [34]:
# Find the date to use in spitting test and train
split_date = train_test_split_date(df, 'OFFMARKETDATE', 0.132)
print(f'{split_date=}')
# Create Sequential Test and Training Sets
train_df = df.where(df['OFFMARKETDATE'] < split_date) 
test_df = df.where(df['OFFMARKETDATE'] >= split_date).where(df['LISTDATE'] <= split_date)
print(f'Total: {df.count()} Train: {train_df.count()} Test: {test_df.count()} Ignored: {df.count()-train_df.count()-test_df.count()}')

is float
split_date=datetime.date(2017, 12, 10)
Total: 5000 Train: 4828 Test: 154 Ignored: 18


In [35]:
from pyspark.ml.regression import RandomForestRegressor

# Initialize model with columns to utilize
rf_model = RandomForestRegressor(
    featuresCol="features",
    labelCol="SALESCLOSEPRICE",
    predictionCol="Prediction_Price",
    seed=42
)
# Train model
rf_model = rf_model.fit(train_df)

## Predicting with a Model

In [36]:
# Make predictions
rfr_predictions = rf_model.transform(test_df)
# Inspect results
rfr_predictions.select("Prediction_Price", "SALESCLOSEPRICE").show(5)

+------------------+---------------+
|  Prediction_Price|SALESCLOSEPRICE|
+------------------+---------------+
| 171639.4528586283|         165000|
| 260900.1474134571|         247000|
|142444.87873712316|         152500|
| 205748.0335092715|         197000|
|260026.97684505762|         243000|
+------------------+---------------+
only showing top 5 rows





### Evaluating a Model

In [37]:
from pyspark.ml.evaluation import RegressionEvaluator
# Select columns to compute test error
evaluator = RegressionEvaluator(labelCol="SALESCLOSEPRICE",
                                predictionCol="Prediction_Price")
# Create evaluation metrics
rmse = evaluator.evaluate(rfr_predictions, {evaluator.metricName: "rmse"})
r2 = evaluator.evaluate(rfr_predictions, {evaluator.metricName: "r2"})
# Print Model Metrics
print('RMSE: ' + str(rmse))
print('R^2: ' + str(r2))

RMSE: 19917.033228039953
R^2: 0.974771400412479


## Exercises

### Building a Gradient Boosted Trees Regression Model

In [38]:
from pyspark.ml.regression import GBTRegressor

# Train a Gradient Boosted Trees (GBT) model.
gbt_model = GBTRegressor(featuresCol='features',
                           labelCol='SALESCLOSEPRICE',
                           predictionCol="Prediction_Price",
                           seed=42
                           )

# Train model.
gbt_model = gbt_model.fit(train_df)

In [39]:
# Make predictions
gbt_predictions = gbt_model.transform(test_df)
# Inspect results
gbt_predictions.select("Prediction_Price", "SALESCLOSEPRICE").show(5)

+------------------+---------------+
|  Prediction_Price|SALESCLOSEPRICE|
+------------------+---------------+
|163912.95890436717|         165000|
| 258152.4635674858|         247000|
|148179.98836945827|         152500|
|202149.65878374162|         197000|
|259771.60385521356|         243000|
+------------------+---------------+
only showing top 5 rows



### Evaluating & Comparing Algorithms

In [40]:
from pyspark.ml.evaluation import RegressionEvaluator

# Select columns to compute test error
evaluator = RegressionEvaluator(labelCol="SALESCLOSEPRICE", 
                                predictionCol="Prediction_Price")
# Dictionary of model predictions to loop over
models = {'Gradient Boosted Trees': gbt_predictions, 'Random Forest Regression': rfr_predictions}
for key, preds in models.items():
    # Create evaluation metrics
    rmse = evaluator.evaluate(preds, {evaluator.metricName: "rmse"})
    r2 = evaluator.evaluate(preds, {evaluator.metricName: "r2"})

    # Print Model Metrics
    print(key + ' RMSE: ' + str(rmse))
    print(key + ' R^2: ' + str(r2))

Gradient Boosted Trees RMSE: 21797.462201608305
Gradient Boosted Trees R^2: 0.9697826959431237
Random Forest Regression RMSE: 19917.033228039953
Random Forest Regression R^2: 0.974771400412479


# Interpreting, Saving & Loading

## Interpreting a Model

In [41]:
import pandas as pd
# Convert feature importances to a pandas column
fi_df = pd.DataFrame(rf_model.featureImportances.toArray(),
                     columns=['importance'])
# Convert list of feature names to pandas column
fi_df['feature'] = pd.Series(features_cols)
# Sort the data based on feature importance
fi_df.sort_values(by=['importance'], ascending=False, inplace=True)

In [42]:
# Interpret results
fi_df.head(9)

Unnamed: 0,importance,feature
1,0.343282,LISTPRICE
2,0.219893,ORIGINALLISTPRICE
12,0.090295,LIVINGAREA
19,0.079788,BATHSTOTAL
9,0.075346,TAXES
8,0.067342,SQFTABOVEGROUND
3,0.041688,PRICEPERTSFT
10,0.032333,TAXWITHASSESSMENTS
6,0.016859,FIREPLACES


## Saving & Loading Models

In [44]:
# Save model
! rm -rf rfr_real_estate_model/
rf_model.save('rfr_real_estate_model')
from pyspark.ml.regression import RandomForestRegressionModel
# Load model from
model2 = RandomForestRegressionModel.load('rfr_real_estate_model')

## Exercises

### Interpreting Results

In [45]:
# Convert feature importances to a pandas column
fi_df = pd.DataFrame(rf_model.featureImportances.toArray(), columns=['importance'])

# Convert list of feature names to pandas column
fi_df['feature'] = pd.Series(features_cols)

# Sort the data based on feature importance
fi_df.sort_values(by=['importance'], ascending=False, inplace=True)

# Inspect Results
fi_df.head(10)

Unnamed: 0,importance,feature
1,0.343282,LISTPRICE
2,0.219893,ORIGINALLISTPRICE
12,0.090295,LIVINGAREA
19,0.079788,BATHSTOTAL
9,0.075346,TAXES
8,0.067342,SQFTABOVEGROUND
3,0.041688,PRICEPERTSFT
10,0.032333,TAXWITHASSESSMENTS
6,0.016859,FIREPLACES
13,0.005787,YEARBUILT


### Saving & Loading Models

In [47]:
from pyspark.ml.regression import RandomForestRegressionModel

# Save model
! rm -rf rfr_no_listprice/
rf_model.save('rfr_no_listprice')

# Load model
loaded_model = RandomForestRegressionModel.load('rfr_no_listprice')