In [3]:
# import library
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [4]:
b_df = pd.read_csv('burritos_01022018.csv')

In [5]:
b_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 385 entries, 0 to 384
Data columns (total 66 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Location        385 non-null    object 
 1   Burrito         385 non-null    object 
 2   Date            385 non-null    object 
 3   Neighborhood    87 non-null     object 
 4   Address         83 non-null     object 
 5   URL             82 non-null     object 
 6   Yelp            82 non-null     float64
 7   Google          82 non-null     float64
 8   Chips           24 non-null     object 
 9   Cost            378 non-null    float64
 10  Hunger          382 non-null    float64
 11  Mass (g)        22 non-null     float64
 12  Density (g/mL)  22 non-null     float64
 13  Length          251 non-null    float64
 14  Circum          249 non-null    float64
 15  Volume          249 non-null    float64
 16  Tortilla        385 non-null    float64
 17  Temp            365 non-null    flo

In [6]:
# checking null value in columns
b_df.isnull().sum()

Location          0
Burrito           0
Date              0
Neighborhood    298
Address         302
               ... 
Bacon           382
Sushi           383
Avocado         372
Corn            382
Zucchini        384
Length: 66, dtype: int64

In [7]:
b_df.columns

Index(['Location', 'Burrito', 'Date', 'Neighborhood', 'Address', 'URL', 'Yelp',
       'Google', 'Chips', 'Cost', 'Hunger', 'Mass (g)', 'Density (g/mL)',
       'Length', 'Circum', 'Volume', 'Tortilla', 'Temp', 'Meat', 'Fillings',
       'Meat:filling', 'Uniformity', 'Salsa', 'Synergy', 'Wrap', 'overall',
       'Rec', 'Reviewer', 'Notes', 'Unreliable', 'NonSD', 'Beef', 'Pico',
       'Guac', 'Cheese', 'Fries', 'Sour cream', 'Pork', 'Chicken', 'Shrimp',
       'Fish', 'Rice', 'Beans', 'Lettuce', 'Tomato', 'Bell peper', 'Carrots',
       'Cabbage', 'Sauce', 'Salsa.1', 'Cilantro', 'Onion', 'Taquito',
       'Pineapple', 'Ham', 'Chile relleno', 'Nopales', 'Lobster', 'Queso',
       'Egg', 'Mushroom', 'Bacon', 'Sushi', 'Avocado', 'Corn', 'Zucchini'],
      dtype='object')

In [8]:
# importing the RegEx library 
import re

In [9]:
# Eliminating the () from the column name.
b_df.columns= [re.sub("([\(\[]).*?([\)\]])","",x).strip() for x in b_df.columns]
b_df.columns= [x.replace(":","_").strip() for x in b_df.columns]

In [10]:
b_df.columns

Index(['Location', 'Burrito', 'Date', 'Neighborhood', 'Address', 'URL', 'Yelp',
       'Google', 'Chips', 'Cost', 'Hunger', 'Mass', 'Density', 'Length',
       'Circum', 'Volume', 'Tortilla', 'Temp', 'Meat', 'Fillings',
       'Meat_filling', 'Uniformity', 'Salsa', 'Synergy', 'Wrap', 'overall',
       'Rec', 'Reviewer', 'Notes', 'Unreliable', 'NonSD', 'Beef', 'Pico',
       'Guac', 'Cheese', 'Fries', 'Sour cream', 'Pork', 'Chicken', 'Shrimp',
       'Fish', 'Rice', 'Beans', 'Lettuce', 'Tomato', 'Bell peper', 'Carrots',
       'Cabbage', 'Sauce', 'Salsa.1', 'Cilantro', 'Onion', 'Taquito',
       'Pineapple', 'Ham', 'Chile relleno', 'Nopales', 'Lobster', 'Queso',
       'Egg', 'Mushroom', 'Bacon', 'Sushi', 'Avocado', 'Corn', 'Zucchini'],
      dtype='object')

In [11]:
import atoti as tt

Welcome to atoti 0.6.2!

By using this community edition, you agree with the license available at https://docs.atoti.io/latest/eula.html.
Browse the official documentation at https://docs.atoti.io.
Join the community at https://www.atoti.io/register.

atoti collects telemetry data, which is used to help understand how to improve the product.
If you don't wish to send usage data, set the ATOTI_DISABLE_TELEMETRY environment variable to True.

You can hide this message by setting the ATOTI_HIDE_EULA_MESSAGE environment variable to True.


In [21]:
session = tt.create_session(config={'user_content_storage' : "./content","port":9000})

Deleting existing "Unnamed" session to create the new one.


In [13]:
# scale circumference, volume, fillings, length, mass, and cost data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [14]:
burrito_vars_norm = b_df.loc[:,['Circum','Volume','Length','Mass','Cost']]

#change 0-1 scaline to 0-10 scale for readability
bnorms = scaler.fit_transform(burrito_vars_norm)*10

#create new columns for scaled values in our datframe (df)
b_df[['Circum_norm','Volume_norm','Length_norm','Mass_norm','Cost_norm']]=bnorms

In [15]:
b_df.columns

Index(['Location', 'Burrito', 'Date', 'Neighborhood', 'Address', 'URL', 'Yelp',
       'Google', 'Chips', 'Cost', 'Hunger', 'Mass', 'Density', 'Length',
       'Circum', 'Volume', 'Tortilla', 'Temp', 'Meat', 'Fillings',
       'Meat_filling', 'Uniformity', 'Salsa', 'Synergy', 'Wrap', 'overall',
       'Rec', 'Reviewer', 'Notes', 'Unreliable', 'NonSD', 'Beef', 'Pico',
       'Guac', 'Cheese', 'Fries', 'Sour cream', 'Pork', 'Chicken', 'Shrimp',
       'Fish', 'Rice', 'Beans', 'Lettuce', 'Tomato', 'Bell peper', 'Carrots',
       'Cabbage', 'Sauce', 'Salsa.1', 'Cilantro', 'Onion', 'Taquito',
       'Pineapple', 'Ham', 'Chile relleno', 'Nopales', 'Lobster', 'Queso',
       'Egg', 'Mushroom', 'Bacon', 'Sushi', 'Avocado', 'Corn', 'Zucchini',
       'Circum_norm', 'Volume_norm', 'Length_norm', 'Mass_norm', 'Cost_norm'],
      dtype='object')

In [16]:
#create new dataframe for variables so they can be used in a visualization platform
#melt reshapes the data so that we have 1 column with circum, volume, length, mass, and cost as features & 1 with values.
#we increaste the number of rows by 5x and decrease the number of columns by 4.
burrito_variables = pd.melt(b_df.reset_index(),id_vars=['Location','Burrito'], value_vars= ['Circum_norm', 'Volume_norm', 'Length_norm', 'Mass_norm', 'Cost_norm'])
burrito_variables

Unnamed: 0,Location,Burrito,variable,value
0,Donato's taco shop,California,Circum_norm,
1,Oscar's Mexican food,California,Circum_norm,
2,Oscar's Mexican food,Carnitas,Circum_norm,
3,Oscar's Mexican food,Carne asada,Circum_norm,
4,Pollos Maria,California,Circum_norm,
...,...,...,...,...
1920,Rigoberto's Taco Shop,California,Cost_norm,1.753748
1921,Rigoberto's Taco Shop,California,Cost_norm,1.753748
1922,Burrito Box,Steak with guacamole,Cost_norm,3.866424
1923,Taco Stand,California,Cost_norm,2.226261


In [17]:
from flask import Flask, session

In [23]:
burrit_table = session.read_pandas(b_df,table_name = 'burritos')
burrit_table.head()

Unnamed: 0,Location,Burrito,Date,Neighborhood,Address,URL,Yelp,Google,Chips,Cost,...,Bacon,Sushi,Avocado,Corn,Zucchini,Circum_norm,Volume_norm,Length_norm,Mass_norm,Cost_norm
0,Donato's taco shop,California,1/18/2016,Miramar,6780 Miramar Rd,http://donatostacoshop.net/,3.5,4.2,,6.49,...,,,,,,,,,,1.590186
1,Oscar's Mexican food,California,1/24/2016,San Marcos,225 S Rancho Santa Fe Rd,http://www.yelp.com/biz/oscars-mexican-food-sa...,3.5,3.3,,5.45,...,,,,,,,,,,1.117674
2,Oscar's Mexican food,Carnitas,1/24/2016,,,,,,,4.85,...,,,,,,,,,,0.84507
3,Oscar's Mexican food,Carne asada,1/24/2016,,,,,,,5.25,...,,,,,,,,,,1.026806
4,Pollos Maria,California,1/27/2016,Carlsbad,3055 Harding St,http://pollosmaria.com/,4.0,3.8,x,6.59,...,,,,,,,,,,1.63562


In [24]:
cube  = session.create_cube(burrit_table)

In [26]:
# Aliasing the hierarchies property to a shorter variable name.
h = cube.hierarchies
l = cube.levels
m = cube.measures

In [27]:
m['five'] = 5

In [29]:
session.visualize('Exploration_1')

Open the notebook in JupyterLab with the atoti extension enabled to build this widget.