In [1]:
import numpy as np
import pylab as plt
import numpy.linalg as lin

In [2]:
food_data = np.genfromtxt('food-texture.csv', delimiter=',', skip_header = 1, usecols = (1,2,3,4,5))

In [3]:
fig1, ax1 = plt.subplots(nrows = 5)
ax1[0].hist(food_data[:,0])
ax1[1].hist(food_data[:,1])
ax1[2].hist(food_data[:,2])
ax1[3].hist(food_data[:,3])
ax1[4].hist(food_data[:,4])
plt.savefig('Food_histograms.pdf')
plt.close()

In [4]:
food_normalized_data = np.zeros(food_data.shape)
for column in range(0,5):
    mean = np.mean(food_data[:,column])
    dev = np.cov(food_data[:,column])
    food_normalized_data[:,column] = (food_data[:,column] - mean)/np.sqrt(dev)

In [5]:
fig2, ax2 = plt.subplots(nrows = 5)
ax2[0].hist(food_normalized_data[:,0])
ax2[1].hist(food_normalized_data[:,1])
ax2[2].hist(food_normalized_data[:,2])
ax2[3].hist(food_normalized_data[:,3])
ax2[4].hist(food_normalized_data[:,4])
plt.savefig('Food_normalized_histograms.pdf')
plt.close()

In [6]:
cov_matrix = np.cov(food_normalized_data.T)
[eigenval,eigenvec] = lin.eig(cov_matrix)
eigenval_var = np.sum(eigenval)
comp_1_weight = eigenval[0]/eigenval_var*100.0
comp_2_weight = eigenval[1]/eigenval_var*100.0
print('El peso del primer componente es: %.3f' % comp_1_weight)
print('El peso del segundo componente es: %.3f' % comp_2_weight)

El peso del primer componente es: 60.624
El peso del segundo componente es: 25.914


In [7]:
comp_1 = eigenvec[:,0]
comp_2 = eigenvec[:,1]
Npuntos = 200

In [8]:
slope_1_12 = comp_1[0]/comp_1[1]
slope_2_12 = comp_2[0]/comp_2[1]
min_12 = food_normalized_data[:,0].min()
max_12 = food_normalized_data[:,0].max()
axis_1 = np.linspace(min_12-0.1,max_12+0.1,Npuntos)
comp_1_line_12 = slope_1_12*axis_1
comp_2_line_12 = slope_2_12*axis_1

slope_1_23 = comp_1[1]/comp_1[2]
slope_2_23 = comp_2[1]/comp_2[2]
min_23 = food_normalized_data[:,1].min()
max_23 = food_normalized_data[:,1].max()
axis_2 = np.linspace(min_23-0.1,max_23+0.1,Npuntos)
comp_1_line_23 = slope_1_23*axis_2
comp_2_line_23 = slope_2_23*axis_2

slope_1_34 = comp_1[2]/comp_1[3]
slope_2_34 = comp_2[2]/comp_2[3]
min_34 = food_normalized_data[:,2].min()
max_34 = food_normalized_data[:,2].max()
axis_3 = np.linspace(min_34-0.1,max_34+0.1,Npuntos)
comp_1_line_34 = slope_1_34*axis_3
comp_2_line_34 = slope_2_34*axis_3

slope_1_45 = comp_1[3]/comp_1[4]
slope_2_45 = comp_2[3]/comp_2[4]
min_45 = food_normalized_data[:,3].min()
max_45 = food_normalized_data[:,3].max()
axis_4 = np.linspace(min_45-0.1,max_45+0.1,Npuntos)
comp_1_line_45 = slope_1_45*axis_4
comp_2_line_45 = slope_2_45*axis_4

In [9]:
#food_projected_data = np.zeros((food_data.shape[0],2))
#food_projected_data[:,0] = np.dot(food_normalized_data,comp_1)
#food_projected_data[:,1] = np.dot(food_normalized_data,comp_2)

In [10]:
fig3, ax3 = plt.subplots(2,2)
ax3[0,0].scatter(food_normalized_data[:,0],food_normalized_data[:,1])
ax3[0,0].plot(axis_1,comp_1_line_12,'r')
ax3[0,0].plot(axis_1,comp_2_line_12, 'y')
ax3[0,0].set_xlim([min_12-0.1, max_12+0.1])
ax3[0,1].scatter(food_normalized_data[:,1],food_normalized_data[:,2])
ax3[0,1].plot(axis_2,comp_1_line_23,'r')
ax3[0,1].plot(axis_2,comp_2_line_23, 'y')
ax3[0,1].set_xlim([min_23-0.1, max_23+0.1])
ax3[1,0].scatter(food_normalized_data[:,2],food_normalized_data[:,3])
ax3[1,0].plot(axis_3,comp_1_line_34,'r')
ax3[1,0].plot(axis_3,comp_2_line_34, 'y')
ax3[1,0].set_xlim([min_34-0.1, max_34+0.1])
ax3[1,1].scatter(food_normalized_data[:,3],food_normalized_data[:,4])
ax3[1,1].plot(axis_4,comp_1_line_45,'r')
ax3[1,1].plot(axis_4,comp_2_line_45, 'y')
ax3[1,1].set_xlim([min_45-0.1, max_45+0.1])
plt.savefig('Food_scatter.pdf')
plt.close()