In [363]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [364]:
# Import files
base_path = '../'
data_folder_name = 'data/raw/fresh-sip/'
survey_filename = 'Pre_Launch_Survey_Cleaned.csv'

# Input Data
survey_df = pd.read_csv(os.path.join(base_path, data_folder_name, survey_filename), index_col='Serial No')
survey_df

Unnamed: 0_level_0,What combination of ingredients present in a beverage are you more likely to buy?,What volume of beverage are you most likely to buy?,At which price point are you most likely to buy a beverage?,Which beverage type appeals to you the most?,Which is your most preferred location to buy beverages?,"Do you prefer to have juice during winters? If not, what is your preferred choice of drink during winters?"
Serial No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,"Seasonal fruits such as mango, orange, grapes,...",500 ml,Between ₹150 - ₹250,Cold pressed juice,Equal preference,
2,"Seasonal fruits such as mango, orange, grapes,...",350 ml,Between ₹150 - ₹250,Healthy detox juice,Equal preference,
3,"Seasonal fruits such as mango, orange, grapes,...",Less than 300 ml,Less than ₹150,Cold pressed juice,Food Court in malls,
4,"Seasonal fruits such as mango, orange, grapes,...",Greater than 750 ml,Between ₹150 - ₹250,Cold pressed juice,Equal preference,
5,"Seasonal fruits such as mango, orange, grapes,...",350 ml,Less than ₹150,Cold pressed juice,Equal preference,
...,...,...,...,...,...,...
106,"Exotic Fruits such as kiwi, dragon fruit, blue...",Less than 300 ml,Between ₹150 - ₹250,Fusion of two or more fruits and nuts,Equal preference,"No, coffee"
107,"Non seasonal fruits such as pineapple, apple, ...",Less than 300 ml,Between ₹150 - ₹250,Fusion of two or more fruits and nuts,Stand-alone outlets,
108,"Non seasonal fruits such as pineapple, apple, ...",350 ml,Less than ₹150,Cold pressed juice,Stand-alone outlets,
109,"Seasonal fruits such as mango, orange, grapes,...",500 ml,Less than ₹150,Fusion of two or more fruits and nuts,Stand-alone outlets,


In [365]:
# Transform column names
survey_df.columns = ['ingredients','volume','price','beverage_type', 'location','response']
survey_df['response'] = 1

In [366]:
# Unique values per column
unique_vals = {}
columns = survey_df.columns
for col in survey_df.columns:
    unique_vals[col] = survey_df[col].unique().tolist()

In [367]:
# Convert to a choice based conjoint survey from a frequency based survey
def expand_survey(df : pd.DataFrame = survey_df):
    response_rows = []
    for index, row in survey_df.iterrows():
        row_value = row.tolist()
        response_rows.append(row_value)
        for col_index in range(5):
            column = columns[col_index]
            value = row_value[col_index]
            unique_col_vals = unique_vals[column].copy()
            try:
                unique_col_vals.remove(value) 
            except:
                continue
            
            melt_row_value = [row_value] * len(unique_col_vals)
            for duplicate_row_index in range(len(melt_row_value)):
                duplicate_row = melt_row_value[duplicate_row_index].copy()
                # set response equal to zero
                duplicate_row[-1] = 0
                # set new values to relevant column
                duplicate_row[col_index] = unique_col_vals[duplicate_row_index]
                response_rows.append(duplicate_row)
    
    return pd.DataFrame(response_rows, columns= survey_df.columns)

In [368]:
# Expanded Survey Dataset
expand_survey_df = expand_survey(df = survey_df)
expand_survey_df

Unnamed: 0,ingredients,volume,price,beverage_type,location,response
0,"Seasonal fruits such as mango, orange, grapes,...",500 ml,Between ₹150 - ₹250,Cold pressed juice,Equal preference,1
1,"Exotic Fruits such as kiwi, dragon fruit, blue...",500 ml,Between ₹150 - ₹250,Cold pressed juice,Equal preference,0
2,"Non seasonal fruits such as pineapple, apple, ...",500 ml,Between ₹150 - ₹250,Cold pressed juice,Equal preference,0
3,"Veggies such as carrot, cucumber, tomato",500 ml,Between ₹150 - ₹250,Cold pressed juice,Equal preference,0
4,"Seasonal fruits such as mango, orange, grapes,...",350 ml,Between ₹150 - ₹250,Cold pressed juice,Equal preference,0
...,...,...,...,...,...,...
1755,"Seasonal fruits such as mango, orange, grapes,...",350 ml,Between ₹150 - ₹250,Mocktails,Food Court in malls,0
1756,"Seasonal fruits such as mango, orange, grapes,...",350 ml,Between ₹150 - ₹250,Dairy based lassi or smoothies,Food Court in malls,0
1757,"Seasonal fruits such as mango, orange, grapes,...",350 ml,Between ₹150 - ₹250,Fusion of two or more fruits and nuts,Food Court in malls,0
1758,"Seasonal fruits such as mango, orange, grapes,...",350 ml,Between ₹150 - ₹250,Cold pressed juice,Equal preference,0


In [369]:
# Final Conjoint Dataset
conjoint_df = pd.get_dummies(expand_survey_df, prefix = expand_survey_df.columns[:-1])
conjoint_df

Unnamed: 0,response,"ingredients_Exotic Fruits such as kiwi, dragon fruit, blue berry","ingredients_Non seasonal fruits such as pineapple, apple, banana, papaya","ingredients_Seasonal fruits such as mango, orange, grapes, watermelon","ingredients_Veggies such as carrot, cucumber, tomato",volume_350 ml,volume_500 ml,volume_Greater than 750 ml,volume_Less than 300 ml,price_Between ₹150 - ₹250,...,price_Less than ₹150,price_More than ₹400,beverage_type_Cold pressed juice,beverage_type_Dairy based lassi or smoothies,beverage_type_Fusion of two or more fruits and nuts,beverage_type_Healthy detox juice,beverage_type_Mocktails,location_Equal preference,location_Food Court in malls,location_Stand-alone outlets
0,1,False,False,True,False,False,True,False,False,True,...,False,False,True,False,False,False,False,True,False,False
1,0,True,False,False,False,False,True,False,False,True,...,False,False,True,False,False,False,False,True,False,False
2,0,False,True,False,False,False,True,False,False,True,...,False,False,True,False,False,False,False,True,False,False
3,0,False,False,False,True,False,True,False,False,True,...,False,False,True,False,False,False,False,True,False,False
4,0,False,False,True,False,True,False,False,False,True,...,False,False,True,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1755,0,False,False,True,False,True,False,False,False,True,...,False,False,False,False,False,False,True,False,True,False
1756,0,False,False,True,False,True,False,False,False,True,...,False,False,False,True,False,False,False,False,True,False
1757,0,False,False,True,False,True,False,False,False,True,...,False,False,False,False,True,False,False,False,True,False
1758,0,False,False,True,False,True,False,False,False,True,...,False,False,True,False,False,False,False,True,False,False


In [370]:
# Write to folder
base_write_path = '../'
write_data_folder_name = 'src/datasets/'
write_filename = 'pre_launch_conjoint.csv'
conjoint_df.to_csv(path_or_buf= os.path.join(base_write_path, write_data_folder_name, write_filename), index = False)