# Burrito Dashboard

#### Author: *Taimur Shahzad Gill*

### *Importing the Libraries*

In [17]:
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
import re 
from sklearn.preprocessing import MinMaxScaler

### *Reading the Dataset*

In [3]:
df= pd.read_csv('burritos.csv')

### *Data Exploration*

In [4]:
df.head()

Unnamed: 0,Location,Burrito,Date,Neighborhood,Address,URL,Yelp,Google,Chips,Cost,...,Nopales,Lobster,Queso,Egg,Mushroom,Bacon,Sushi,Avocado,Corn,Zucchini
0,Donato's taco shop,California,1/18/2016,Miramar,6780 Miramar Rd,http://donatostacoshop.net/,3.5,4.2,,6.49,...,,,,,,,,,,
1,Oscar's Mexican food,California,1/24/2016,San Marcos,225 S Rancho Santa Fe Rd,http://www.yelp.com/biz/oscars-mexican-food-sa...,3.5,3.3,,5.45,...,,,,,,,,,,
2,Oscar's Mexican food,Carnitas,1/24/2016,,,,,,,4.85,...,,,,,,,,,,
3,Oscar's Mexican food,Carne asada,1/24/2016,,,,,,,5.25,...,,,,,,,,,,
4,Pollos Maria,California,1/27/2016,Carlsbad,3055 Harding St,http://pollosmaria.com/,4.0,3.8,x,6.59,...,,,,,,,,,,


In [5]:
df.columns

Index(['Location', 'Burrito', 'Date', 'Neighborhood', 'Address', 'URL', 'Yelp',
       'Google', 'Chips', 'Cost', 'Hunger', 'Mass (g)', 'Density (g/mL)',
       'Length', 'Circum', 'Volume', 'Tortilla', 'Temp', 'Meat', 'Fillings',
       'Meat:filling', 'Uniformity', 'Salsa', 'Synergy', 'Wrap', 'overall',
       'Rec', 'Reviewer', 'Notes', 'Unreliable', 'NonSD', 'Beef', 'Pico',
       'Guac', 'Cheese', 'Fries', 'Sour cream', 'Pork', 'Chicken', 'Shrimp',
       'Fish', 'Rice', 'Beans', 'Lettuce', 'Tomato', 'Bell peper', 'Carrots',
       'Cabbage', 'Sauce', 'Salsa.1', 'Cilantro', 'Onion', 'Taquito',
       'Pineapple', 'Ham', 'Chile relleno', 'Nopales', 'Lobster', 'Queso',
       'Egg', 'Mushroom', 'Bacon', 'Sushi', 'Avocado', 'Corn', 'Zucchini'],
      dtype='object')

In [6]:
df.describe()

Unnamed: 0,Yelp,Google,Cost,Hunger,Mass (g),Density (g/mL),Length,Circum,Volume,Tortilla,Temp,Meat,Fillings,Meat:filling,Uniformity,Salsa,Synergy,Wrap,overall,Queso
count,82.0,82.0,378.0,382.0,22.0,22.0,251.0,249.0,249.0,385.0,365.0,373.0,383.0,377.0,383.0,363.0,383.0,383.0,383.0,0.0
mean,3.89878,4.17439,7.04828,3.499895,546.181818,0.675277,20.072988,22.098996,0.785462,3.486104,3.741096,3.596247,3.527546,3.564403,3.422324,3.348485,3.576371,3.995561,3.604813,
std,0.470748,0.377389,1.517983,0.808791,144.445619,0.080468,2.060584,1.79501,0.153465,0.787282,0.975079,0.835896,0.812342,0.987858,1.061032,0.927714,0.896275,1.107876,0.761901,
min,2.5,2.9,2.99,0.5,350.0,0.56,15.0,17.0,0.4,1.0,1.0,1.0,1.0,0.5,0.0,0.0,1.0,0.0,1.0,
25%,3.5,4.0,6.25,3.0,450.0,0.619485,18.5,21.0,0.68,3.0,3.0,3.0,3.0,3.0,2.5,3.0,3.0,3.5,3.0,
50%,4.0,4.2,6.95,3.5,540.0,0.658099,20.0,22.0,0.77,3.5,4.0,3.75,3.5,4.0,3.5,3.5,3.8,4.0,3.75,
75%,4.0,4.4,7.75,4.0,595.0,0.721726,21.5,23.0,0.88,4.0,4.5,4.0,4.0,4.0,4.0,4.0,4.0,5.0,4.1,
max,4.5,5.0,25.0,5.0,925.0,0.865672,26.0,29.0,1.54,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,


### *Data Cleaning*

In [7]:
# use regular expression to remove () from columns
df.columns = [re.sub("([\(\[]).*?([\)\]])", "", x).strip() for x in df.columns]
# use .replace() to remove : from columns
df.columns = [x.replace(':','_').strip() for x in df.columns]

In [8]:
df.head()

Unnamed: 0,Location,Burrito,Date,Neighborhood,Address,URL,Yelp,Google,Chips,Cost,...,Nopales,Lobster,Queso,Egg,Mushroom,Bacon,Sushi,Avocado,Corn,Zucchini
0,Donato's taco shop,California,1/18/2016,Miramar,6780 Miramar Rd,http://donatostacoshop.net/,3.5,4.2,,6.49,...,,,,,,,,,,
1,Oscar's Mexican food,California,1/24/2016,San Marcos,225 S Rancho Santa Fe Rd,http://www.yelp.com/biz/oscars-mexican-food-sa...,3.5,3.3,,5.45,...,,,,,,,,,,
2,Oscar's Mexican food,Carnitas,1/24/2016,,,,,,,4.85,...,,,,,,,,,,
3,Oscar's Mexican food,Carne asada,1/24/2016,,,,,,,5.25,...,,,,,,,,,,
4,Pollos Maria,California,1/27/2016,Carlsbad,3055 Harding St,http://pollosmaria.com/,4.0,3.8,x,6.59,...,,,,,,,,,,


In [9]:
df.isnull().any()

Location        False
Burrito         False
Date            False
Neighborhood     True
Address          True
                ...  
Bacon            True
Sushi            True
Avocado          True
Corn             True
Zucchini         True
Length: 66, dtype: bool

In [10]:
df.isnull().sum() / df.shape[0]

Location        0.000000
Burrito         0.000000
Date            0.000000
Neighborhood    0.774026
Address         0.784416
                  ...   
Bacon           0.992208
Sushi           0.994805
Avocado         0.966234
Corn            0.992208
Zucchini        0.997403
Length: 66, dtype: float64

In [13]:
session = session = tt.Session()

Deleting existing "Unnamed" session to create the new one.


### *Data Scaling*

In [14]:
scaler = MinMaxScaler()
burrito_vars_norm = df.loc[:,['Circum','Volume','Length','Mass','Cost']]

#change 0-1 scaline to 0-10 scale for readability 
bnorms = scaler.fit_transform(burrito_vars_norm)*10

#create new columns for normalized values in our datframe (df)
df[['Circum_norm','Volume_norm','Length_norm','Mass_norm','Cost_norm']] = bnorms

### *Exploratory Data Analysis*

In [15]:
burrito_variables = pd.melt(df.reset_index(), id_vars = ['Location','Burrito'], value_vars = ['Circum_norm','Volume_norm','Length_norm','Mass_norm','Cost_norm'],)
burrito_variables

Unnamed: 0,Location,Burrito,variable,value
0,Donato's taco shop,California,Circum_norm,
1,Oscar's Mexican food,California,Circum_norm,
2,Oscar's Mexican food,Carnitas,Circum_norm,
3,Oscar's Mexican food,Carne asada,Circum_norm,
4,Pollos Maria,California,Circum_norm,
...,...,...,...,...
1920,Rigoberto's Taco Shop,California,Cost_norm,1.753748
1921,Rigoberto's Taco Shop,California,Cost_norm,1.753748
1922,Burrito Box,Steak with guacamole,Cost_norm,3.866424
1923,Taco Stand,California,Cost_norm,2.226261


In [16]:
burrito_table = session.read_pandas(df, table_name = 'burritos')

In [17]:
burrito_table.head()

Unnamed: 0,Location,Burrito,Date,Neighborhood,Address,URL,Yelp,Google,Chips,Cost,...,Bacon,Sushi,Avocado,Corn,Zucchini,Circum_norm,Volume_norm,Length_norm,Mass_norm,Cost_norm
0,Donato's taco shop,California,1/18/2016,Miramar,6780 Miramar Rd,http://donatostacoshop.net/,3.5,4.2,,6.49,...,,,,,,,,,,1.590186
1,Lola's 7 Up Market & Deli,Carne asada,2/29/2016,,,,,,,7.0,...,,,,,,,,,,1.821899
2,Rigoberto's Taco Shop,Carne asada,3/8/2016,Miramar,7094 Miramar Rd,https://www.yelp.com/biz/rigobertos-taco-shop-...,4.0,4.4,,6.6,...,,,,,,,,,,1.640164
3,California Burritos,Chile relleno and carnitas,4/15/2016,,,,,,,6.5,...,,,,,,,,,,1.59473
4,Senor Grubby's,California,4/24/2016,Carlsbad,377 Carlsbad Village Dr,eatgrubbys.com,4.0,4.1,,9.0,...,,,,,,3.75,2.631579,3.636364,,2.730577


In [18]:
cube = session.create_cube(burrito_table)

In [19]:
# create hierarchies, levels, and measures 
h = cube.hierarchies
l = cube.levels
m = cube.measures

In [20]:
cube

In [21]:
# create new measures (examples)
m['five'] = 5
m['lenXwrap'] = m['Length.MEAN'] * m['Wrap.MEAN']

In [30]:
session.visualize('exploration')

In [23]:
burrito_var_table = session.read_pandas(burrito_variables, table_name = 'burrito_variables', keys=['Location','Burrito','variable'])

In [24]:
burrito_table.join(burrito_var_table)

In [25]:
m['value'] = tt.agg.mean(burrito_var_table["value"])
m['aggvalue'] = tt.agg.mean(m['value'], scope = tt.scope.origin(l['Location'],l['Burrito'],l['variable']))

  deprecated(


In [31]:
# create radar chart 
session.visualize('radar chart')

In [27]:
m['four'] = 4

In [32]:
# new data for google gauge
m['four_google'] = 4.25

In [33]:
session.visualize('scatter plot neighborhood')