# Purpose
The purpose of this notebook is two-fold. In it, I aim to:
1. Produce the dataset for the increased price of large vehicles scenario. This dataset should have all columns necessary to support the original MNL and mixed logit models.


In [1]:
import pandas as pd

%matplotlib inline

# Load the car data

In [2]:
car_df = pd.read_csv("../data/car_long_format.csv")

# Correct data errors

Note that the data from the R package 'mlogit' does not match the raw data taken from the journal of applied econometrics data archive. There was likely some error when converting from the format of the data in the archive (which is admittedly a strange format) to the data in mlogit.

Basically, 'electric' fuel type and 'methanol' fuel type appear to be switched.

In [3]:
exchange_dict = {'methanol': 'electric',
                 'electric': 'methanol',
                 'cng': 'cng',
                 'gasoline': 'gasoline'}

car_df['fuel_type'] = car_df['fuel_type'].map(exchange_dict)

# Create the necessary variables

In [4]:
car_df.head(6).T

Unnamed: 0,0,1,2,3,4,5
obs_id,1,1,1,1,1,1
alt_id,1,2,3,4,5,6
choice,1,0,0,0,0,0
college,0,0,0,0,0,0
hsg2,0,0,0,0,0,0
coml5,0,0,0,0,0,0
vehicle_size,3,3,2,2,3,3
acceleration,4,4,6,6,2.5,2.5
price_over_log_income,4.17534,4.17534,4.81771,4.81771,5.13889,5.13889
top_speed,95,95,110,110,140,140


In [5]:
# Create the 'big_enough' variable
car_df['big_enough'] =\
    (car_df['hsg2'] & (car_df['vehicle_size'] == 3)).astype(int)

# Determine the type of car
car_df['sports_utility_vehicle'] =\
    (car_df['body_type'] == 'sportuv').astype(int)

car_df['sports_car'] =\
    (car_df['body_type'] == 'sportcar').astype(int)
    
car_df['station_wagon'] =\
    (car_df['body_type'] == 'stwagon').astype(int)

car_df['truck'] =\
    (car_df['body_type'] == 'truck').astype(int)

car_df['van'] =\
    (car_df['body_type'] == 'van').astype(int)

# Determine the car's fuel type
car_df['electric'] =\
    (car_df['fuel_type'] == 'electric').astype(int)

car_df['compressed_natural_gas'] =\
    (car_df['fuel_type'] == 'cng').astype(int)

car_df['methanol'] =\
    (car_df['fuel_type'] == 'methanol').astype(int)

# Determine if this is an electric vehicle with a small commute
car_df['electric_commute_lte_5mi'] =\
    (car_df['electric'] & car_df['coml5']).astype(int)

# See if this is an electric vehicle for a college educated person
car_df['electric_and_college'] =\
    (car_df['electric'] & car_df['college']).astype(int)

# See if this is a methanol vehicle for a college educated person
car_df['methanol_and_college'] =\
    (car_df['methanol'] & car_df['college']).astype(int)
    
# Scale the range and acceleration variables
car_df['range_over_100'] = car_df['range'] / 100.0
car_df['acceleration_over_10'] = car_df['acceleration'] / 10.0
car_df['top_speed_over_100'] = car_df['top_speed'] / 100.0
car_df['vehicle_size_over_10'] = car_df['vehicle_size'] / 10.0
car_df['tens_of_cents_per_mile'] = car_df['cents_per_mile'] / 10.0

# Create variables for the mixed logit models
car_df['non_ev'] = (~car_df['electric']).astype(int)
car_df['non_cng'] = (~car_df['compressed_natural_gas']).astype(int)

In [6]:
# Store a version of the dataset for the mixed logit model in R
neg_variables = ['price_over_log_income',
                 'acceleration_over_10',
                 'pollution',
                 'tens_of_cents_per_mile']

prefix = 'neg_'
for col in neg_variables:
    new_col = prefix + col
    car_df[new_col] = -1 * car_df[col]

car_df.to_csv("../data/mixl_ready_car_data.csv", index=False)

# Create the forecast dataset

In [7]:
# Create a new dataframe for the mixed logit model
# with the appropriate changes in price
forecast_df = car_df.copy()
large_gas_car_idx = ((forecast_df['body_type'] == 'regcar') &
                     (forecast_df['vehicle_size'] == 3) &
                     (forecast_df['fuel_type'] == 'gasoline'))
forecast_df.loc[large_gas_car_idx, 'price_over_log_income'] *= 1.2
forecast_df['neg_price_over_log_income'] = -1 * forecast_df['price_over_log_income']

forecast_df.to_csv("../data/mixl_ready_forecast_data.csv",
                   index=False)