# PCA
Cannot interpret any of the components past the 2nd component. Components:
1. All features of the burrito are positively correlated
2. With increasing volume, there is a decrease in the wrap integrity, ingredient uniformity, and meat-to-filling ratio (and possibly temperature).

In [8]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.decomposition import PCA

In [2]:
import sys
import os.path
sys.path.append(os.path.abspath(os.path.dirname(os.getcwd())))

import util

### Load data

In [3]:
df, df_shops, df_ingredients = util.load_burritos()
df.tail()

Unnamed: 0,Location,Burrito,Date,Cost,Hunger,Mass (g),Density (g/mL),Length,Circum,Volume,...,Fillings,Meat:filling,Uniformity,Salsa,Synergy,Wrap,overall,Rec,Reviewer,Notes
328,caliente mexican food,california,7/6/2017,6.47,4.0,,,23.0,22.0,0.89,...,2.0,3.0,3.0,1.0,2.0,3.0,2.0,,erik k,
329,taco villa,carne asada,7/12/2017,6.99,3.0,,,,,,...,3.0,2.5,4.0,2.0,2.0,4.0,2.0,,richard,
330,taco villa,california,7/19/2017,6.99,4.0,,,20.5,21.5,0.75,...,3.5,3.5,3.5,3.5,3.5,5.0,3.7,,scott,
331,los tacos 2,local,7/22/2017,8.5,3.8,,,23.0,22.0,0.89,...,4.0,3.0,2.5,3.5,3.5,3.0,4.0,,scott,
332,los tacos 2,local,7/22/2017,8.5,4.0,,,22.0,21.0,0.77,...,4.0,3.5,3.5,4.0,5.0,3.5,4.0,,emily,


# Compute principal components

In [30]:
# Define features of interest
foi = ['Volume', 'Tortilla', 'Temp', 'Meat', 'Fillings', 'Meat:filling', 'Uniformity',
       'Salsa', 'Synergy', 'Wrap', 'overall']
df_feat = df[foi]

# Normalize all features
df_feat = (df_feat - df_feat.mean()) / df_feat.std()

# Compute principal components
n_components=len(df_feat.keys())
pca = PCA(n_components=n_components)
pca.fit(df_feat.dropna())

PCA(copy=True, iterated_power='auto', n_components=11, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [31]:
# Print the explained variance of each component and the component weights
exp_vars = pca.explained_variance_ratio_
pcs = pca.components_
for i in range(n_components):
    print('Component {}, explained variance = {:.3f}'.format(i, exp_vars[i]))
    for j in range(n_components):
        print('\t{:<14} weight = {:.1f}'.format(foi[j], pcs[i][j]))

Component 0, explained variance = 0.352
	Volume         weight = -0.1
	Tortilla       weight = -0.3
	Temp           weight = -0.1
	Meat           weight = -0.4
	Fillings       weight = -0.4
	Meat:filling   weight = -0.3
	Uniformity     weight = -0.2
	Salsa          weight = -0.3
	Synergy        weight = -0.4
	Wrap           weight = -0.1
	overall        weight = -0.5
Component 1, explained variance = 0.133
	Volume         weight = 0.6
	Tortilla       weight = 0.1
	Temp           weight = -0.2
	Meat           weight = 0.2
	Fillings       weight = 0.2
	Meat:filling   weight = -0.4
	Uniformity     weight = -0.4
	Salsa          weight = -0.0
	Synergy        weight = 0.1
	Wrap           weight = -0.5
	overall        weight = 0.0
Component 2, explained variance = 0.094
	Volume         weight = 0.2
	Tortilla       weight = -0.5
	Temp           weight = -0.4
	Meat           weight = -0.0
	Fillings       weight = -0.0
	Meat:filling   weight = 0.4
	Uniformity     weight = 0.4
	Salsa          wei

array([[-0.00380911, -0.21252799, -0.14496335, -0.32953149, -0.31464821,
        -0.39706052, -0.36713343, -0.29553099, -0.42372709, -0.156688  ,
        -0.37726664],
       [ 0.03932121,  0.10676118, -0.09189453,  0.26129552,  0.30034465,
        -0.27334798, -0.49585841,  0.15831094,  0.23954332, -0.63145646,
         0.13547599],
       [ 0.00788537, -0.21133442, -0.36696599, -0.0735244 , -0.08309876,
         0.33615349,  0.5045302 , -0.13702723, -0.02513429, -0.64741912,
        -0.04680498],
       [ 0.00308436,  0.03500898,  0.85502435,  0.03739103, -0.13186556,
         0.12136395,  0.02194268, -0.34814987, -0.04225053, -0.33061536,
         0.03742953],
       [ 0.00773794, -0.19746565, -0.20840259,  0.08737027,  0.22176645,
         0.46641463, -0.41923808, -0.63229733,  0.09103249,  0.17803146,
         0.16618268],
       [-0.00877743,  0.3982936 , -0.00125633, -0.36064213, -0.21699769,
         0.59374375, -0.37893434,  0.35966908, -0.06178124, -0.11046754,
        -0.150