In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.stats import skew
from collections import Counter
from scipy import stats
import json
import time
import re

<p style="font-size:14pt"><b>Reading dataset</b></p>

In [2]:
data = pd.read_csv("cars.csv")

<p style="font-size:14pt"><b>Dropping irrelevant features</b></p>

In [3]:
data.drop('seller_name', axis=1, inplace=True)
data.drop('price_drop', axis=1, inplace=True)

<p style="font-size:14pt"><b>Drop sample when more than 25% of the data is missing</b></p>

In [4]:
print("Before dropping irrelevant rows:", data.shape)

data.dropna(thresh=13, inplace=True)
print("After dropping irrelevant rows:", data.shape)

Before dropping irrelevant rows: (762091, 18)
After dropping irrelevant rows: (755203, 18)


<p style="font-size:14pt"><b>Feature processing</b></p>

In [5]:
# 1- manufacturer

print("Unique values:", len(data['manufacturer'].unique()))
print("Unique values of manufacturer:", data['manufacturer'].unique())

one_hot_encoded = pd.get_dummies(data['manufacturer'], prefix='manufacturer')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('manufacturer', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

data['manufacturer_Other'] = 0

Unique values: 30
Unique values of manufacturer: ['Acura' 'Audi' 'BMW' 'Buick' 'Cadillac' 'Chevrolet' 'Chrysler' 'Dodge'
 'Ford' 'GMC' 'Honda' 'Hyundai' 'INFINITI' 'Jaguar' 'Jeep' 'Kia'
 'Land Rover' 'Lexus' 'Lincoln' 'Mazda' 'Mercedes-Benz' 'Mitsubishi'
 'Nissan' 'Porsche' 'RAM' 'Subaru' 'Tesla' 'Toyota' 'Volkswagen' 'Volvo']


In [6]:
# 2- model

model_names = [m.split()[0] for m in data['model']]
data['model'] = model_names

print("Unique values:", len(data['model'].unique()))

one_hot_encoded = pd.get_dummies(data['model'], prefix='model')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('model', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

data['model_Other'] = 0

Unique values: 741


In [7]:
# 3- year

year_values = data['year'].values

# calculate missing values

missing_count = np.isnan(year_values).sum()
total = len(year_values);
print("Missing values:", missing_count, "(" , (missing_count / total) * 100, "%)")

# calculate unique values

unique_values = len(np.unique(year_values))
print("Unique Values:", unique_values)

# One-hot-encoding

one_hot_encoded = pd.get_dummies(data['year'], prefix='year')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('year', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

data['year_Other'] = 0

Missing values: 0 ( 0.0 %)
Unique Values: 98


In [8]:
# 4- mileage

mileage_values = data['mileage'].values

missing_count = np.isnan(mileage_values).sum()
total = len(mileage_values);
print("Missing values:", missing_count, "(" , (missing_count / total) * 100, "%)")

mileage_values_without_nan = mileage_values[~np.isnan(mileage_values)]

skewness = skew(mileage_values_without_nan)
print("Skewness of 'mileage':", skewness)

# Since distribution is heavily skewed towards the left (skewness < -1) => replace missing values with median

median = np.nanmedian(mileage_values)

data['mileage'].fillna(median, inplace=True)

mean = np.mean(mileage_values)
std = np.std(mileage_values)

print("Mean:", mean)
print("Median:", median)
print("Standard Deviation:", std)

scaled_mileage_values = (mileage_values - mean) / std

data['mileage'] = scaled_mileage_values

stats = {}

stats['mileage'] = [mean, std]

Missing values: 483 ( 0.06395631373286388 %)
Skewness of 'mileage': 1.4545791397787897
Mean: 55993.40694223937
Median: 45912.0
Standard Deviation: 43546.652793744746


In [9]:
# engine

def extract_horsepower(engine_type):
    if pd.isna(engine_type):
        return np.nan
    horsepower = re.search(r'(\d+HP)', engine_type)
    if horsepower:
        return int(horsepower.group(1)[:-2])
    else:
        return np.nan

def extract_liters(engine_type):
    if pd.isna(engine_type):
        return np.nan
    liters = re.search(r'(\d+\.\d+)L\b', engine_type)
    if liters:
        return float(liters.group(1))
    else:
        return np.nan

def is_turbo(engine_type):
    if pd.isna(engine_type):
        return 0
    if "Turbo" in engine_type:
        return 1
    else:
        return 0

def extract_cylinders(engine_type):
    if pd.isna(engine_type):
        return np.nan
    cylinders = re.search(r'\b(I-\d+|V\d+)\b', engine_type)
    if cylinders:
        cylinders_number = cylinders.group(0)[2:]
        if cylinders_number.isdigit():  # Check if it's a valid integer
            return int(cylinders_number)
        else:
            return np.nan  # Return NaN if it's not a valid integer
    else:
        return np.nan  # Return NaN if the pattern is not found


def extract_injection_type(engine_type):
    if pd.isna(engine_type):
        return np.nan
    injection_type = re.search(r'(MPFI|GDI|SPFI|PGM-FI|DI|SIDI|TFSI|FSI)', engine_type)
    if injection_type:
        return injection_type.group(0)
    else:
        return np.nan



def handle_missing_values(data):
    for column in data.columns:
        if data[column].dtype == 'object':  # Check if the column is non-numeric
            mode_value = data[column].mode().iloc[0]  # Calculate mode
            data[column].fillna(mode_value, inplace=True)  # Impute missing values with mode
        else:
            median_value = data[column].median()
            data[column].fillna(median_value, inplace=True)
    return data


data['horsepower'] = data['engine'].apply(extract_horsepower)
data['turbo'] = data['engine'].apply(is_turbo)
data['cylinders'] = data['engine'].apply(extract_cylinders)
data['injection_type'] = data['engine'].apply(extract_injection_type)

data = handle_missing_values(data)

In [10]:
# 5- injection_type

one_hot_encoded = pd.get_dummies(data['injection_type'], prefix='injection_type')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('injection_type', axis=1, inplace=True)
data.drop('engine', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

data['injection_type_Other'] = 0  

In [11]:
# 6- cylinders

one_hot_encoded = pd.get_dummies(data['cylinders'], prefix='cylinders')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('cylinders', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

data['cylinders_Other'] = 0  

In [12]:
# 7- turbo

one_hot_encoded = pd.get_dummies(data['turbo'], prefix='turbo')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('turbo', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

In [13]:
# 8- horsepower

horsepower_values = data['horsepower'].values

median = np.nanmedian(horsepower_values)

data['horsepower'].fillna(median, inplace=True)

mean = np.mean(horsepower_values)
std = np.std(horsepower_values)

print("Mean:", mean)
print("Median:", median)
print("Standard Deviation:", std)

scaled_horsepower_values = (horsepower_values - mean) / std

data['horsepower'] = scaled_horsepower_values

stats['horsepower'] = [mean, std]

Mean: 154.02551366983448
Median: 154.0
Standard Deviation: 23.652139099171883


In [14]:
# 9- transmission

def get_transmission_info(transmission):
    transmission_type = "Other"
    number_of_speeds = "Unknown"
    
    if isinstance(transmission, str):
        transmission = transmission.lower()
        
      # Use a regular expression to find the number of speeds
       
        if "cvt" in transmission or "shiftronic" in transmission or "continuously" in transmission:
            transmission_type = "Automatic Continuously Variable"
        
        elif "dual" in transmission or "dct" in transmission:
            transmission_type = "Dual Clutch"
    
        elif "semi" in transmission or "autostick" in transmission or "paddle" in transmission:
            transmission_type = "Semi-Automatic"
        
        elif "manual" in transmission or " m " in transmission:
            transmission_type = "Manual"
        
        elif "automatic" in transmission or "auto" in transmission or "a/t" in transmission or " a " in transmission or " at" in transmission :
            transmission_type = "Automatic"
            
       
    
    
        speeds = re.search(r'(\d+)\s*-?\s*(spd|speed)', transmission)
        if speeds:
            number_of_speeds = speeds.group(1) + "-speed"
            
    return (transmission_type, number_of_speeds)

data["transmission"], data["number_of_speeds"] = zip(*data["transmission"].apply(get_transmission_info))

print("Transmission unique values:", len(data['transmission'].unique()))


one_hot_encoded = pd.get_dummies(data['transmission'], prefix='transmission')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('transmission', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

print("Number of speeds unique values:", len(data['number_of_speeds'].unique()))

one_hot_encoded = pd.get_dummies(data['number_of_speeds'], prefix='number_of_speeds')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('number_of_speeds', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

Transmission unique values: 6
Number of speeds unique values: 11


In [15]:
# 10- drivetrain

def process_drivetrain(drivetrain):    
    if isinstance(drivetrain, str):
        drivetrain = drivetrain.lower()
        if "front" in drivetrain or "fwd" in drivetrain:
            return "Front-Wheel Drive"
        elif "all" in drivetrain or "awd" in drivetrain:
            return "All-Wheel Drive"
        elif "four" in drivetrain or "4" in drivetrain:
            return "Four-Wheel Drive"
        elif "rear" in drivetrain or "rwd" in drivetrain:
            return "Rear-Wheel Drive"
        else:
            return 'Other'
    else:
        return 'Other'
        

# calculate missing values
missing_count = data['drivetrain'].isnull().sum()
total = len(data['drivetrain'])
print("Missing values:", missing_count, "(" , (missing_count / total) * 100, "%)")

data['drivetrain'] = data['drivetrain'].apply(process_drivetrain)

# calculate 'not' missing values which will be added to others 
other_count = (data['drivetrain'] == 'other').sum()
print("'Other' count excluding missing:", other_count - missing_count)

# calculate unique values
unique_values = len(data['drivetrain'].unique())
print("Unique Values:", unique_values)

# One-hot-encoding
one_hot_encoded = pd.get_dummies(data['drivetrain'], prefix='drivetrain')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('drivetrain', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

Missing values: 0 ( 0.0 %)
'Other' count excluding missing: 0
Unique Values: 5


In [16]:
# 11- fuel_type

def process_fuel_type(fuel):
        if isinstance(fuel, str):
            fuel = fuel.lower()
        else:
            fuel = 'other'
        if fuel in fuel_types:
            return fuel
        else:
            return 'other'

        
# calculate missing values
missing_count = data['fuel_type'].isnull().sum()
total = len(data['fuel_type'])
print("Missing values:", missing_count, "(" , (missing_count / total) * 100, "%)")

fuel_types = ['gasoline', 'hybrid', 'diesel','electric']

data['fuel_type'] = data['fuel_type'].apply(process_fuel_type)

# calculate 'not' missing values which will be added to others 
other_count = (data['fuel_type'] == 'other').sum()
print("'Other' count excluding missing:", other_count - missing_count)

# calculate unique values
unique_values = len(data['fuel_type'].unique())
print("Unique Values:", unique_values)

# One-hot-encoding
one_hot_encoded = pd.get_dummies(data['fuel_type'], prefix='fuel_type')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('fuel_type', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

Missing values: 0 ( 0.0 %)
'Other' count excluding missing: 21147
Unique Values: 5


In [17]:
# 12- mpg

def calculate_middle_value(mpg_values):
    updated_values = []
    for value in mpg_values:
        if isinstance(value, str) and '-' in value:
            start, end = map(float, value.split('-'))
            updated_values.append((start + end) / 2)
        else:
            updated_values.append(value)
    return pd.Series(updated_values)


mpg_values = data['mpg'].values

# Update range with middle value
mpg_values = calculate_middle_value(data['mpg'])

# Convert to numeric, handle errors by coercing to NaN
mpg_values = pd.to_numeric(mpg_values, errors='coerce')

# Replace missing values with median
mean = np.nanmean(mpg_values)
mpg_values.fillna(mean, inplace=True)

# Calculate skewness
skewness = skew(mpg_values)
print("Skewness of 'mpg':", skewness)

# Calculate mean and standard deviation
mean = np.nanmean(mpg_values)
std = np.nanstd(mpg_values)

# Z-Scaling
scaled_mpg_values = (mpg_values - mean) / std

data['mpg'] = scaled_mpg_values

stats['mpg'] = [mean, std]

# Ensure all missing values are replaced with mean
missing_count = np.isnan(data['mpg']).sum()
if missing_count > 0:
    mean_mpg = data['mpg'].mean()
    print("Replacing remaining missing values with mean:", mean_mpg)
    data['mpg'].fillna(mean_mpg, inplace=True)

print("Number of missing values in 'mpg' column:", data['mpg'].isnull().sum())

Skewness of 'mpg': 0.6806944025369247
Replacing remaining missing values with mean: 0.00045740170694761624
Number of missing values in 'mpg' column: 0


In [18]:
# 13- exterior_color

# Create a list of words from the 'exterior_color' column
words = [word.lower() for item in data['exterior_color'] for word in str(item).split()]

# Count the frequency of each word
word_freq = Counter(words)

# Sort the Counter dictionary by frequency in descending order and limit to top 10
sorted_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:10]

top_10_colors = [word for word, _ in sorted_freq]

def replace_words(item):
    words = str(item).split()
    for word in words:
        if word.lower() in top_10_colors:
            return word.lower()  
    return 'other'


# calculate missing values
missing_count = data['exterior_color'].isnull().sum()
total = len(data['exterior_color'])
print("Missing values:", missing_count, "(" , (missing_count / total) * 100, "%)")

data['exterior_color'] = data['exterior_color'].apply(replace_words)

# calculate 'not' missing values which will be added to others 
other_count = (data['exterior_color'] == 'Other').sum()
print("'Other' count excluding missing:", other_count - missing_count)

# calculate unique values
unique_values = len(data['exterior_color'].unique())
print("Unique Values:", unique_values)
print(data['exterior_color'].unique())

# One-hot-encoding
one_hot_encoded = pd.get_dummies(data['exterior_color'], prefix='exterior_color')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('exterior_color', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

Missing values: 0 ( 0.0 %)
'Other' count excluding missing: 0
Unique Values: 11
['black' 'gray' 'white' 'metallic' 'silver' 'red' 'blue' 'pearl' 'other'
 'crystal' 'clearcoat']


In [19]:
# 14- interior_color

# Create a list of words from the 'interior_color' column
words = [word.lower() for item in data['interior_color'] for word in str(item).split()]

# Count the frequency of each word
word_freq = Counter(words)

# Sort the Counter dictionary by frequency in descending order and limit to top 10
sorted_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:10]

top_10_colors = [word for word, _ in sorted_freq]

def replace_words(item):
    words = str(item).split()
    for word in words:
        if word.lower() in top_10_colors:
            return word.lower()  
    return 'other'


# calculate missing values
missing_count = data['interior_color'].isnull().sum()
total = len(data['interior_color'])
print("Missing values:", missing_count, "(" , (missing_count / total) * 100, "%)")

data['interior_color'] = data['interior_color'].apply(replace_words)

# calculate 'not' missing values which will be added to others 
other_count = (data['interior_color'] == 'Other').sum()
print("'Other' count excluding missing:", other_count - missing_count)

# calculate unique values
unique_values = len(data['interior_color'].unique())
print("Unique Values:", unique_values)
print(data['interior_color'].unique())

# One-hot-encoding
one_hot_encoded = pd.get_dummies(data['interior_color'], prefix='interior_color')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('interior_color', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

Missing values: 0 ( 0.0 %)
'Other' count excluding missing: 0
Unique Values: 11
['other' 'ebony' 'black' 'beige' 'gray' '/' 'graphite' 'charcoal' 'light'
 'dark' 'jet']


In [20]:
# 15- accidents_or_damage

# calculate missing values
missing_count = data['accidents_or_damage'].isnull().sum()
total = len(data['accidents_or_damage'])
print("Missing values:", missing_count, "(" , (missing_count / total) * 100,"%)")

# change nan to a value so it could be categorized
data['accidents_or_damage'] = data['accidents_or_damage'].fillna('missing')

# one-hot-encode
one_hot_encoded = pd.get_dummies(data['accidents_or_damage'], prefix='accidents_or_damage')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('accidents_or_damage', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

Missing values: 0 ( 0.0 %)


In [21]:
# 16- one_owner

# calculate missing values
missing_count = data['one_owner'].isnull().sum()
total = len(data['one_owner'])
print("Missing values:", missing_count, "(" , (missing_count / total) * 100,"%)")

# change nan to a value so it could be categorized
data['one_owner'] = data['one_owner'].fillna('missing')

# one-hot-encode
one_hot_encoded = pd.get_dummies(data['one_owner'], prefix='one_owner')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('one_owner', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

Missing values: 0 ( 0.0 %)


In [22]:
# 17- personal_use_only

# calculate missing values
missing_count = data['personal_use_only'].isnull().sum()
total = len(data['personal_use_only'])
print("Missing values:", missing_count, "(" , (missing_count / total) * 100,"%)")

# change nan to a value so it could be categorized
data['personal_use_only'] = data['personal_use_only'].fillna('missing')

# one-hot-encode
one_hot_encoded = pd.get_dummies(data['personal_use_only'], prefix='personal_use_only')
one_hot_encoded = one_hot_encoded.astype(int)
data.drop('personal_use_only', axis=1, inplace=True)
data = pd.concat([data, one_hot_encoded], axis=1)

Missing values: 0 ( 0.0 %)


In [23]:
# 18- seller_rating

seller_rating_values = data['seller_rating'].values

# Calculate missing values rate
missing_count = np.isnan(seller_rating_values).sum()
total = len(seller_rating_values)
print("Missing values:", missing_count, "(", (missing_count / total) * 100, "%)")

# Fill missing values with 0 (not calculating skewness as we're not using it for imputation)
data['seller_rating'].fillna(0, inplace=True)

# Calculate mean and standard deviation
mean = np.mean(seller_rating_values)
std = np.std(seller_rating_values)

print("Mean:", mean)
print("Standard Deviation:", std)

# Perform z-scaling
scaled_seller_rating_values = (seller_rating_values - mean) / std

data['seller_rating'] = scaled_seller_rating_values

stats['seller_rating'] = [mean, std]

Missing values: 0 ( 0.0 %)
Mean: 4.253239857362854
Standard Deviation: 0.7012164495296226


In [24]:
# 19- driver_rating

driver_rating_values = data['driver_rating'].values

# calculate missing values rate
missing_count = np.isnan(driver_rating_values).sum()
total = len(driver_rating_values);
print("Missing values:", missing_count, "(" , (missing_count / total) * 100,"%)")

driver_rating_values_without_nan = driver_rating_values[~np.isnan(driver_rating_values)]

# calculate skewness
skewness = skew(driver_rating_values_without_nan)
print("Skewness of 'driver_rating_values':", skewness)

# Since distribution is heavily skewed towards the left (skewness < -1) => replace missing values with median

# calculate median
median = np.nanmedian(driver_rating_values)

# fill missing values with median
data['driver_rating'].fillna(median, inplace=True)

# calculate mean and std
mean = np.mean(driver_rating_values)
std = np.std(driver_rating_values)

print("Mean:", mean)
print("Median:", median)
print("Standard Deviation:", std)

# (x - mean) / std
scaled_driver_rating_values = (driver_rating_values - mean) / std

data['driver_rating'] = scaled_driver_rating_values

stats['driver_rating'] = [mean, std]

Missing values: 0 ( 0.0 %)
Skewness of 'driver_rating_values': -2.8786982197031428
Mean: 4.626389196017497
Median: 4.7
Standard Deviation: 0.2708873041222213


In [25]:
# 20- driver_reviews_num

driver_reviews_values = data['driver_reviews_num'].values

print(len(np.unique(driver_reviews_values)))

# calculate missing values rate
missing_count = np.isnan(driver_reviews_values).sum()
total = len(driver_reviews_values);
print("Missing values:", missing_count, "(" , (missing_count/total) * 100,"%)")

# calculate mean and std
mean = np.mean(driver_reviews_values)
std = np.std(driver_reviews_values)

print("Mean:", mean)
print("Standard Deviation:", std)

# (x - mean) / std
scaled_driver_reviews_values = (driver_reviews_values - mean) / std

data['driver_reviews_num'] = scaled_driver_reviews_values

stats['driver_reviews_num'] = [mean, std]

369
Missing values: 0 ( 0.0 %)
Mean: 90.07572268648298
Standard Deviation: 115.36723955047654


In [26]:
data['price'] = data['price'].clip(3000, 120000)

In [27]:
for column in data.columns:
    if data[column].dtype in ['int64', 'float64']:  # Check if column contains numeric values
        if data[column].nunique() > 2:  # Check if column has more than 2 unique values (excluding 0's and 1's)
            # Apply transformations
            if data[column].dtype == 'float64' and column != 'price':
                data[column] = data[column].clip(lower=-2, upper=2)


In [28]:
data.reset_index(inplace=True, drop=True) 

In [29]:
print("Missing values in dataset:", data.isna().sum().sum())

Missing values in dataset: 0


In [30]:
print("Column names:")
for i, column_name in enumerate(data.columns, 1):
    print(f"{i}. {column_name}")

Column names:
1. mileage
2. mpg
3. seller_rating
4. driver_rating
5. driver_reviews_num
6. price
7. manufacturer_Acura
8. manufacturer_Audi
9. manufacturer_BMW
10. manufacturer_Buick
11. manufacturer_Cadillac
12. manufacturer_Chevrolet
13. manufacturer_Chrysler
14. manufacturer_Dodge
15. manufacturer_Ford
16. manufacturer_GMC
17. manufacturer_Honda
18. manufacturer_Hyundai
19. manufacturer_INFINITI
20. manufacturer_Jaguar
21. manufacturer_Jeep
22. manufacturer_Kia
23. manufacturer_Land Rover
24. manufacturer_Lexus
25. manufacturer_Lincoln
26. manufacturer_Mazda
27. manufacturer_Mercedes-Benz
28. manufacturer_Mitsubishi
29. manufacturer_Nissan
30. manufacturer_Porsche
31. manufacturer_RAM
32. manufacturer_Subaru
33. manufacturer_Tesla
34. manufacturer_Toyota
35. manufacturer_Volkswagen
36. manufacturer_Volvo
37. manufacturer_Other
38. model_128
39. model_135
40. model_150
41. model_1500
42. model_1600
43. model_190SL
44. model_200
45. model_2002
46. model_200SX
47. model_228
48. model_2

<p style="font-size:14pt"><b>Saving files needed for preprocessing</b></p>

In [31]:
with open("stats.json", 'w') as f:
    json.dump(stats, f)

In [32]:
column_names = list(data.columns)

with open("features.json", 'w') as f:
    json.dump(column_names, f)

<p style="font-size:14pt"><b>Saving cleaned dataset</b></p>

In [33]:
data.to_csv('clean_data.csv', index=False)