Skip to content
Browse files

new week of material committed, not solved

  • Loading branch information...
1 parent fe15dc0 commit 2f52a1715eb12bc5076c4536ef7177662836ab41 @schneems committed Dec 11, 2011
View
BIN mlclass-ex5/mlclass-ex5/octave-core
Binary file not shown.
View
21 mlclass-ex6/mlclass-ex6/dataset3Params.m
@@ -25,7 +25,28 @@
+values = [0.01 0.03 0.1 0.3 1 3 10 30];
+error_min = inf;
+
+
+fprintf('chill hommie i am looking for C and sigma, yo values\n');
+for C = values
+ for sigma = values
+ fprintf('.');
+ model = svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
+ err = mean(double(svmPredict(model, Xval) ~= yval));
+ if( err <= error_min )
+ C_final = C;
+ sigma_final = sigma;
+ error_min = err;
+ fprintf('new min found C, sigma = %f, %f with error = %f', C_final, sigma_final, error_min)
+ end
+ end
+end
+C = C_final;
+sigma = sigma_final;
+fprintf('Best value C, sigma = [%f %f] with prediction error = %f\n', C, sigma, error_min);
View
7 mlclass-ex6/mlclass-ex6/emailFeatures.m
@@ -49,10 +49,9 @@
%
-
-
-
-
+for i = word_indices
+ x(i) = 1;
+end
% =========================================================================
View
3 mlclass-ex6/mlclass-ex6/ex6.m
@@ -72,6 +72,9 @@
fprintf('Program paused. Press enter to continue.\n');
pause;
+
+
+
%% =============== Part 4: Visualizing Dataset 2 ================
% The following code will load the next dataset into your environment and
% plot the data.
View
5 mlclass-ex6/mlclass-ex6/gaussianKernel.m
@@ -16,9 +16,8 @@
%
%
-
-
-
+magnitude = sum((x1-x2).^2);
+sim = e^(-magnitude/(2*sigma^2));
% =============================================================
View
BIN mlclass-ex6/mlclass-ex6/octave-core
Binary file not shown.
View
19 mlclass-ex6/mlclass-ex6/processEmail.m
@@ -97,25 +97,24 @@
% str2). It will return 1 only if the two strings are equivalent.
%
-
-
-
-
-
-
-
-
+ for i = 1:length(vocabList)
+ if(strcmp(str, vocabList{i}))
+ word_indices = [ word_indices ; i];
+ end
+ end
% =============================================================
- % Print to screen, ensuring that the output lines are not too long
+ % Print out to the screen, make sure the output lines are not crazy long
if (l + length(str) + 1) > 78
fprintf('\n');
l = 0;
end
fprintf('%s ', str);
- l = l + length(str) + 1;
+
+ % =============================================================
+
end
View
BIN mlclass-ex7/ex7.pdf
Binary file not shown.
View
BIN mlclass-ex7/mlclass-ex7/bird_small.mat
Binary file not shown.
View
BIN mlclass-ex7/mlclass-ex7/bird_small.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
40 mlclass-ex7/mlclass-ex7/computeCentroids.m
@@ -0,0 +1,40 @@
+function centroids = computeCentroids(X, idx, K)
+%COMPUTECENTROIDS returs the new centroids by computing the means of the
+%data points assigned to each centroid.
+% centroids = COMPUTECENTROIDS(X, idx, K) returns the new centroids by
+% computing the means of the data points assigned to each centroid. It is
+% given a dataset X where each row is a single data point, a vector
+% idx of centroid assignments (i.e. each entry in range [1..K]) for each
+% example, and K, the number of centroids. You should return a matrix
+% centroids, where each row of centroids is the mean of the data points
+% assigned to it.
+%
+
+% Useful variables
+[m n] = size(X);
+
+% You need to return the following variables correctly.
+centroids = zeros(K, n);
+
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Go over every centroid and compute mean of all points that
+% belong to it. Concretely, the row vector centroids(i, :)
+% should contain the mean of the data points assigned to
+% centroid i.
+%
+% Note: You can use a for-loop over the centroids to compute this.
+%
+
+
+
+
+
+
+
+
+% =============================================================
+
+
+end
+
View
59 mlclass-ex7/mlclass-ex7/displayData.m
@@ -0,0 +1,59 @@
+function [h, display_array] = displayData(X, example_width)
+%DISPLAYDATA Display 2D data in a nice grid
+% [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
+% stored in X in a nice grid. It returns the figure handle h and the
+% displayed array if requested.
+
+% Set example_width automatically if not passed in
+if ~exist('example_width', 'var') || isempty(example_width)
+ example_width = round(sqrt(size(X, 2)));
+end
+
+% Gray Image
+colormap(gray);
+
+% Compute rows, cols
+[m n] = size(X);
+example_height = (n / example_width);
+
+% Compute number of items to display
+display_rows = floor(sqrt(m));
+display_cols = ceil(m / display_rows);
+
+% Between images padding
+pad = 1;
+
+% Setup blank display
+display_array = - ones(pad + display_rows * (example_height + pad), ...
+ pad + display_cols * (example_width + pad));
+
+% Copy each example into a patch on the display array
+curr_ex = 1;
+for j = 1:display_rows
+ for i = 1:display_cols
+ if curr_ex > m,
+ break;
+ end
+ % Copy the patch
+
+ % Get the max value of the patch
+ max_val = max(abs(X(curr_ex, :)));
+ display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
+ pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
+ reshape(X(curr_ex, :), example_height, example_width) / max_val;
+ curr_ex = curr_ex + 1;
+ end
+ if curr_ex > m,
+ break;
+ end
+end
+
+% Display Image
+h = imagesc(display_array, [-1 1]);
+
+% Do not show axis
+axis image off
+
+drawnow;
+
+end
View
8 mlclass-ex7/mlclass-ex7/drawLine.m
@@ -0,0 +1,8 @@
+function drawLine(p1, p2, varargin)
+%DRAWLINE Draws a line from point p1 to point p2
+% DRAWLINE(p1, p2) Draws a line from point p1 to point p2 and holds the
+% current figure
+
+plot([p1(1) p2(1)], [p1(2) p2(2)], varargin{:});
+
+end
View
174 mlclass-ex7/mlclass-ex7/ex7.m
@@ -0,0 +1,174 @@
+%% Machine Learning Online Class
+% Exercise 7 | Principle Component Analysis and K-Means Clustering
+%
+% Instructions
+% ------------
+%
+% This file contains code that helps you get started on the
+% exercise. You will need to complete the following functions:
+%
+% pca.m
+% projectData.m
+% recoverData.m
+% computeCentroids.m
+% findClosestCentroids.m
+% kMeansInitCentroids.m
+%
+% For this exercise, you will not need to change any code in this file,
+% or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% ================= Part 1: Find Closest Centroids ====================
+% To help you implement K-Means, we have divided the learning algorithm
+% into two functions -- findClosestCentroids and computeCentroids. In this
+% part, you shoudl complete the code in the findClosestCentroids function.
+%
+fprintf('Finding closest centroids.\n\n');
+
+% Load an example dataset that we will be using
+load('ex7data2.mat');
+
+% Select an initial set of centroids
+K = 3; % 3 Centroids
+initial_centroids = [3 3; 6 2; 8 5];
+
+% Find the closest centroids for the examples using the
+% initial_centroids
+idx = findClosestCentroids(X, initial_centroids);
+
+fprintf('Closest centroids for the first 3 examples: \n')
+fprintf(' %d', idx(1:3));
+fprintf('\n(the closest centroids should be 1, 3, 2 respectively)\n');
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ===================== Part 2: Compute Means =========================
+% After implementing the closest centroids function, you should now
+% complete the computeCentroids function.
+%
+fprintf('\nComputing centroids means.\n\n');
+
+% Compute means based on the closest centroids found in the previous part.
+centroids = computeCentroids(X, idx, K);
+
+fprintf('Centroids computed after initial finding of closest centroids: \n')
+fprintf(' %f %f \n' , centroids');
+fprintf('\n(the centroids should be\n');
+fprintf(' [ 2.428301 3.157924 ]\n');
+fprintf(' [ 5.813503 2.633656 ]\n');
+fprintf(' [ 7.119387 3.616684 ]\n\n');
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% =================== Part 3: K-Means Clustering ======================
+% After you have completed the two functions computeCentroids and
+% findClosestCentroids, you have all the necessary pieces to run the
+% kMeans algorithm. In this part, you will run the K-Means algorithm on
+% the example dataset we have provided.
+%
+fprintf('\nRunning K-Means clustering on example dataset.\n\n');
+
+% Load an example dataset
+load('ex7data2.mat');
+
+% Settings for running K-Means
+K = 3;
+max_iters = 10;
+
+% For consistency, here we set centroids to specific values
+% but in practice you want to generate them automatically, such as by
+% settings them to be random examples (as can be seen in
+% kMeansInitCentroids).
+initial_centroids = [3 3; 6 2; 8 5];
+
+% Run K-Means algorithm. The 'true' at the end tells our function to plot
+% the progress of K-Means
+[centroids, idx] = runkMeans(X, initial_centroids, max_iters, true);
+fprintf('\nK-Means Done.\n\n');
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ============= Part 4: K-Means Clustering on Pixels ===============
+% In this exercise, you will use K-Means to compress an image. To do this,
+% you will first run K-Means on the colors of the pixels in the image and
+% then you will map each pixel on to it's closest centroid.
+%
+% You should now complete the code in kMeansInitCentroids.m
+%
+
+fprintf('\nRunning K-Means clustering on pixels from an image.\n\n');
+
+% Load an image of a bird
+A = double(imread('bird_small.png'));
+
+% If imread does not work for you, you can try instead
+% load ('bird_small.mat');
+
+A = A / 255; % Divide by 255 so that all values are in the range 0 - 1
+
+% Size of the image
+img_size = size(A);
+
+% Reshape the image into an Nx3 matrix where N = number of pixels.
+% Each row will contain the Red, Green and Blue pixel values
+% This gives us our dataset matrix X that we will use K-Means on.
+X = reshape(A, img_size(1) * img_size(2), 3);
+
+% Run your K-Means algorithm on this data
+% You should try different values of K and max_iters here
+K = 16;
+max_iters = 10;
+
+% When using K-Means, it is important the initialize the centroids
+% randomly.
+% You should complete the code in kMeansInitCentroids.m before proceeding
+initial_centroids = kMeansInitCentroids(X, K);
+
+% Run K-Means
+[centroids, idx] = runkMeans(X, initial_centroids, max_iters);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================= Part 5: Image Compression ======================
+% In this part of the exercise, you will use the clusters of K-Means to
+% compress an image. To do this, we first find the closest clusters for
+% each example. After that, we
+
+fprintf('\nApplying K-Means to compress an image.\n\n');
+
+% Find closest cluster members
+idx = findClosestCentroids(X, centroids);
+
+% Essentially, now we have represented the image X as in terms of the
+% indices in idx.
+
+% We can now recover the image from the indices (idx) by mapping each pixel
+% (specified by it's index in idx) to the centroid value
+X_recovered = centroids(idx,:);
+
+% Reshape the recovered image into proper dimensions
+X_recovered = reshape(X_recovered, img_size(1), img_size(2), 3);
+
+% Display the original image
+subplot(1, 2, 1);
+imagesc(A);
+title('Original');
+
+% Display compressed image side by side
+subplot(1, 2, 2);
+imagesc(X_recovered)
+title(sprintf('Compressed, with %d colors.', K));
+
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
View
235 mlclass-ex7/mlclass-ex7/ex7_pca.m
@@ -0,0 +1,235 @@
+%% Machine Learning Online Class
+% Exercise 7 | Principle Component Analysis and K-Means Clustering
+%
+% Instructions
+% ------------
+%
+% This file contains code that helps you get started on the
+% exercise. You will need to complete the following functions:
+%
+% pca.m
+% projectData.m
+% recoverData.m
+% computeCentroids.m
+% findClosestCentroids.m
+% kMeansInitCentroids.m
+%
+% For this exercise, you will not need to change any code in this file,
+% or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% ================== Part 1: Load Example Dataset ===================
+% We start this exercise by using a small dataset that is easily to
+% visualize
+%
+fprintf('Visualizing example dataset for PCA.\n\n');
+
+% The following command loads the dataset. You should now have the
+% variable X in your environment
+load ('ex7data1.mat');
+
+% Visualize the example dataset
+plot(X(:, 1), X(:, 2), 'bo');
+axis([0.5 6.5 2 8]); axis square;
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% =============== Part 2: Principal Component Analysis ===============
+% You should now implement PCA, a dimension reduction technique. You
+% should complete the code in pca.m
+%
+fprintf('\nRunning PCA on example dataset.\n\n');
+
+% Before running PCA, it is important to first normalize X
+[X_norm, mu, sigma] = featureNormalize(X);
+
+% Run PCA
+[U, S] = pca(X_norm);
+
+% Compute mu, the mean of the each feature
+
+% Draw the eigenvectors centered at mean of data. These lines show the
+% directions of maximum variations in the dataset.
+hold on;
+drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2);
+drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2);
+hold off;
+
+fprintf('Top eigenvector: \n');
+fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1));
+fprintf('\n(you should expect to see -0.707107 -0.707107)\n');
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% =================== Part 3: Dimension Reduction ===================
+% You should now implement the projection step to map the data onto the
+% first k eigenvectors. The code will then plot the data in this reduced
+% dimensional space. This will show you what the data looks like when
+% using only the corresponding eigenvectors to reconstruct it.
+%
+% You should complete the code in projectData.m
+%
+fprintf('\nDimension reduction on example dataset.\n\n');
+
+% Plot the normalized dataset (returned from pca)
+plot(X_norm(:, 1), X_norm(:, 2), 'bo');
+axis([-4 3 -4 3]); axis square
+
+% Project the data onto K = 1 dimension
+K = 1;
+Z = projectData(X_norm, U, K);
+fprintf('Projection of the first example: %f\n', Z(1));
+fprintf('\n(this value should be about 1.481274)\n\n');
+
+X_rec = recoverData(Z, U, K);
+fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2));
+fprintf('\n(this value should be about -1.047419 -1.047419)\n\n');
+
+% Draw lines connecting the projected points to the original points
+hold on;
+plot(X_rec(:, 1), X_rec(:, 2), 'ro');
+for i = 1:size(X_norm, 1)
+ drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1);
+end
+hold off
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% =============== Part 4: Loading and Visualizing Face Data =============
+% We start the exercise by first loading and visualizing the dataset.
+% The following code will load the dataset into your environment
+%
+fprintf('\nLoading face dataset.\n\n');
+
+% Load Face dataset
+load ('ex7faces.mat')
+
+% Display the first 100 faces in the dataset
+displayData(X(1:100, :));
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% =========== Part 5: PCA on Face Data: Eigenfaces ===================
+% Run PCA and visualize the eigenvectors which are in this case eigenfaces
+% We display the first 36 eigenfaces.
+%
+fprintf(['\nRunning PCA on face dataset.\n' ...
+ '(this mght take a minute or two ...)\n\n']);
+
+% Before running PCA, it is important to first normalize X by subtracting
+% the mean value from each feature
+[X_norm, mu, sigma] = featureNormalize(X);
+
+% Run PCA
+[U, S] = pca(X_norm);
+
+% Visualize the top 36 eigenvectors found
+displayData(U(:, 1:36)');
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ============= Part 6: Dimension Reduction for Faces =================
+% Project images to the eigen space using the top k eigenvectors
+% If you are applying a machine learning algorithm
+fprintf('\nDimension reduction for face dataset.\n\n');
+
+K = 100;
+Z = projectData(X_norm, U, K);
+
+fprintf('The projected data Z has a size of: ')
+fprintf('%d ', size(Z));
+
+fprintf('\n\nProgram paused. Press enter to continue.\n');
+pause;
+
+%% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
+% Project images to the eigen space using the top K eigen vectors and
+% visualize only using those K dimensions
+% Compare to the original input, which is also displayed
+
+fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n');
+
+K = 100;
+X_rec = recoverData(Z, U, K);
+
+% Display normalized data
+subplot(1, 2, 1);
+displayData(X_norm(1:100,:));
+title('Original faces');
+axis square;
+
+% Display reconstructed data from only k eigenfaces
+subplot(1, 2, 2);
+displayData(X_rec(1:100,:));
+title('Recovered faces');
+axis square;
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
+% One useful application of PCA is to use it to visualize high-dimensional
+% data. In the last K-Means exercise you ran K-Means on 3-dimensional
+% pixel colors of an image. We first visualize this output in 3D, and then
+% apply PCA to obtain a visualization in 2D.
+
+close all; close all; clc
+
+% Re-load the image from the previous exercise and run K-Means on it
+% For this to work, you need to complete the K-Means assignment first
+A = double(imread('bird_small.png'));
+
+% If imread does not work for you, you can try instead
+% load ('bird_small.mat');
+
+A = A / 255;
+img_size = size(A);
+X = reshape(A, img_size(1) * img_size(2), 3);
+K = 16;
+max_iters = 10;
+initial_centroids = kMeansInitCentroids(X, K);
+[centroids, idx] = runkMeans(X, initial_centroids, max_iters);
+
+% Sample 1000 random indexes (since working with all the data is
+% too expensive. If you have a fast computer, you may increase this.
+sel = floor(rand(1000, 1) * size(X, 1)) + 1;
+
+% Setup Color Palette
+palette = hsv(K);
+colors = palette(idx(sel), :);
+
+% Visualize the data and centroid memberships in 3D
+figure;
+scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors);
+title('Pixel dataset plotted in 3D. Color shows centroid memberships');
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
+% Use PCA to project this cloud to 2D for visualization
+
+% Subtract the mean to use PCA
+[X_norm, mu, sigma] = featureNormalize(X);
+
+% PCA and project the data to 2D
+[U, S] = pca(X_norm);
+Z = projectData(X_norm, U, 2);
+
+% Plot in 2D
+figure;
+plotDataPoints(Z(sel, :), idx(sel), K);
+title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction');
+fprintf('Program paused. Press enter to continue.\n');
+pause;
View
BIN mlclass-ex7/mlclass-ex7/ex7data1.mat
Binary file not shown.
View
BIN mlclass-ex7/mlclass-ex7/ex7data2.mat
Binary file not shown.
View
BIN mlclass-ex7/mlclass-ex7/ex7faces.mat
Binary file not shown.
View
17 mlclass-ex7/mlclass-ex7/featureNormalize.m
@@ -0,0 +1,17 @@
+function [X_norm, mu, sigma] = featureNormalize(X)
+%FEATURENORMALIZE Normalizes the features in X
+% FEATURENORMALIZE(X) returns a normalized version of X where
+% the mean value of each feature is 0 and the standard deviation
+% is 1. This is often a good preprocessing step to do when
+% working with learning algorithms.
+
+mu = mean(X);
+X_norm = bsxfun(@minus, X, mu);
+
+sigma = std(X_norm);
+X_norm = bsxfun(@rdivide, X_norm, sigma);
+
+
+% ============================================================
+
+end
View
33 mlclass-ex7/mlclass-ex7/findClosestCentroids.m
@@ -0,0 +1,33 @@
+function idx = findClosestCentroids(X, centroids)
+%FINDCLOSESTCENTROIDS computes the centroid memberships for every example
+% idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids
+% in idx for a dataset X where each row is a single example. idx = m x 1
+% vector of centroid assignments (i.e. each entry in range [1..K])
+%
+
+% Set K
+K = size(centroids, 1);
+
+% You need to return the following variables correctly.
+idx = zeros(size(X,1), 1);
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Go over every example, find its closest centroid, and store
+% the index inside idx at the appropriate location.
+% Concretely, idx(i) should contain the index of the centroid
+% closest to example i. Hence, it should be a value in the
+% range 1..K
+%
+% Note: You can use a for-loop over the examples to compute this.
+%
+
+
+
+
+
+
+
+% =============================================================
+
+end
+
View
26 mlclass-ex7/mlclass-ex7/kMeansInitCentroids.m
@@ -0,0 +1,26 @@
+function centroids = kMeansInitCentroids(X, K)
+%KMEANSINITCENTROIDS This function initializes K centroids that are to be
+%used in K-Means on the dataset X
+% centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be
+% used with the K-Means on the dataset X
+%
+
+% You should return this values correctly
+centroids = zeros(K, size(X, 2));
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: You should set centroids to randomly chosen examples from
+% the dataset X
+%
+
+
+
+
+
+
+
+
+% =============================================================
+
+end
+
View
31 mlclass-ex7/mlclass-ex7/pca.m
@@ -0,0 +1,31 @@
+function [U, S] = pca(X)
+%PCA Run principal component analysis on the dataset X
+% [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X
+% Returns the eigenvectors U, the eigenvalues (on diagonal) in S
+%
+
+% Useful values
+[m, n] = size(X);
+
+% You need to return the following variables correctly.
+U = zeros(n);
+S = zeros(n);
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: You should first compute the covariance matrix. Then, you
+% should use the "svd" function to compute the eigenvectors
+% and eigenvalues of the covariance matrix.
+%
+% Note: When computing the covariance matrix, remember to divide by m (the
+% number of examples).
+%
+
+
+
+
+
+
+
+% =========================================================================
+
+end
View
14 mlclass-ex7/mlclass-ex7/plotDataPoints.m
@@ -0,0 +1,14 @@
+function plotDataPoints(X, idx, K)
+%PLOTDATAPOINTS plots data points in X, coloring them so that those with the same
+%index assignments in idx have the same color
+% PLOTDATAPOINTS(X, idx, K) plots data points in X, coloring them so that those
+% with the same index assignments in idx have the same color
+
+% Create palette
+palette = hsv(K + 1);
+colors = palette(idx, :);
+
+% Plot the data
+scatter(X(:,1), X(:,2), 15, colors);
+
+end
View
27 mlclass-ex7/mlclass-ex7/plotProgresskMeans.m
@@ -0,0 +1,27 @@
+function plotProgresskMeans(X, centroids, previous, idx, K, i)
+%PLOTPROGRESSKMEANS is a helper function that displays the progress of
+%k-Means as it is running. It is intended for use only with 2D data.
+% PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
+% points with colors assigned to each centroid. With the previous
+% centroids, it also plots a line between the previous locations and
+% current locations of the centroids.
+%
+
+% Plot the examples
+plotDataPoints(X, idx, K);
+
+% Plot the centroids as black x's
+plot(centroids(:,1), centroids(:,2), 'x', ...
+ 'MarkerEdgeColor','k', ...
+ 'MarkerSize', 10, 'LineWidth', 3);
+
+% Plot the history of the centroids with lines
+for j=1:size(centroids,1)
+ drawLine(centroids(j, :), previous(j, :));
+end
+
+% Title
+title(sprintf('Iteration number %d', i))
+
+end
+
View
26 mlclass-ex7/mlclass-ex7/projectData.m
@@ -0,0 +1,26 @@
+function Z = projectData(X, U, K)
+%PROJECTDATA Computes the reduced data representation when projecting only
+%on to the top k eigenvectors
+% Z = projectData(X, U, K) computes the projection of
+% the normalized inputs X into the reduced dimensional space spanned by
+% the first K columns of U. It returns the projected examples in Z.
+%
+
+% You need to return the following variables correctly.
+Z = zeros(size(X, 1), K);
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Compute the projection of the data using only the top K
+% eigenvectors in U (first K columns).
+% For the i-th example X(i,:), the projection on to the k-th
+% eigenvector is given as follows:
+% x = X(i, :)';
+% projection_k = x' * U(:, k);
+%
+
+
+
+
+% =============================================================
+
+end
View
28 mlclass-ex7/mlclass-ex7/recoverData.m
@@ -0,0 +1,28 @@
+function X_rec = recoverData(Z, U, K)
+%RECOVERDATA Recovers an approximation of the original data when using the
+%projected data
+% X_rec = RECOVERDATA(Z, U, K) recovers an approximation the
+% original data that has been reduced to K dimensions. It returns the
+% approximate reconstruction in X_rec.
+%
+
+% You need to return the following variables correctly.
+X_rec = zeros(size(Z, 1), size(U, 1));
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Compute the approximation of the data by projecting back
+% onto the original space using the top K eigenvectors in U.
+%
+% For the i-th example Z(i,:), the (approximate)
+% recovered data for dimension j is given as follows:
+% v = Z(i, :)';
+% recovered_j = v' * U(j, 1:K)';
+%
+% Notice that U(j, 1:K) is a row vector.
+%
+
+
+
+% =============================================================
+
+end
View
64 mlclass-ex7/mlclass-ex7/runkMeans.m
@@ -0,0 +1,64 @@
+function [centroids, idx] = runkMeans(X, initial_centroids, ...
+ max_iters, plot_progress)
+%RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
+%is a single example
+% [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
+% plot_progress) runs the K-Means algorithm on data matrix X, where each
+% row of X is a single example. It uses initial_centroids used as the
+% initial centroids. max_iters specifies the total number of interactions
+% of K-Means to execute. plot_progress is a true/false flag that
+% indicates if the function should also plot its progress as the
+% learning happens. This is set to false by default. runkMeans returns
+% centroids, a Kxn matrix of the computed centroids and idx, a m x 1
+% vector of centroid assignments (i.e. each entry in range [1..K])
+%
+
+% Set default value for plot progress
+if ~exist('plot_progress', 'var') || isempty(plot_progress)
+ plot_progress = false;
+end
+
+% Plot the data if we are plotting progress
+if plot_progress
+ figure;
+ hold on;
+end
+
+% Initialize values
+[m n] = size(X);
+K = size(initial_centroids, 1);
+centroids = initial_centroids;
+previous_centroids = centroids;
+idx = zeros(m, 1);
+
+% Run K-Means
+for i=1:max_iters
+
+ % Output progress
+ fprintf('K-Means iteration %d/%d...\n', i, max_iters);
+ if exist('OCTAVE_VERSION')
+ fflush(stdout);
+ end
+
+ % For each example in X, assign it to the closest centroid
+ idx = findClosestCentroids(X, centroids);
+
+ % Optionally, plot progress here
+ if plot_progress
+ plotProgresskMeans(X, centroids, previous_centroids, idx, K, i);
+ previous_centroids = centroids;
+ fprintf('Press enter to continue.\n');
+ pause;
+ end
+
+ % Given the memberships, compute new centroids
+ centroids = computeCentroids(X, idx, K);
+end
+
+% Hold off if we are plotting progress
+if plot_progress
+ hold off;
+end
+
+end
+
View
336 mlclass-ex7/mlclass-ex7/submit.m
@@ -0,0 +1,336 @@
+function submit(partId)
+%SUBMIT Submit your code and output to the ml-class servers
+% SUBMIT() will connect to the ml-class server and submit your solution
+
+ fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
+ homework_id());
+ if ~exist('partId', 'var') || isempty(partId)
+ partId = promptPart();
+ end
+
+ % Check valid partId
+ partNames = validParts();
+ if ~isValidPartId(partId)
+ fprintf('!! Invalid homework part selected.\n');
+ fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1);
+ fprintf('!! Submission Cancelled\n');
+ return
+ end
+
+ [login password] = loginPrompt();
+ if isempty(login)
+ fprintf('!! Submission Cancelled\n');
+ return
+ end
+
+ fprintf('\n== Connecting to ml-class ... ');
+ if exist('OCTAVE_VERSION')
+ fflush(stdout);
+ end
+
+ % Setup submit list
+ if partId == numel(partNames) + 1
+ submitParts = 1:numel(partNames);
+ else
+ submitParts = [partId];
+ end
+
+ for s = 1:numel(submitParts)
+ % Submit this part
+ partId = submitParts(s);
+
+ % Get Challenge
+ [login, ch, signature] = getChallenge(login);
+ if isempty(login) || isempty(ch) || isempty(signature)
+ % Some error occured, error string in first return element.
+ fprintf('\n!! Error: %s\n\n', login);
+ return
+ end
+
+ % Attempt Submission with Challenge
+ ch_resp = challengeResponse(login, password, ch);
+ [result, str] = submitSolution(login, ch_resp, partId, output(partId), ...
+ source(partId), signature);
+
+ fprintf('\n== [ml-class] Submitted Homework %s - Part %d - %s\n', ...
+ homework_id(), partId, partNames{partId});
+ fprintf('== %s\n', strtrim(str));
+ if exist('OCTAVE_VERSION')
+ fflush(stdout);
+ end
+ end
+
+end
+
+% ================== CONFIGURABLES FOR EACH HOMEWORK ==================
+
+function id = homework_id()
+ id = '7';
+end
+
+function [partNames] = validParts()
+ partNames = {
+ 'Find Closest Centroids (k-Means)', ...
+ 'Compute Centroid Means (k-Means)' ...
+ 'PCA', ...
+ 'Project Data (PCA)', ...
+ 'Recover Data (PCA)' ...
+ };
+end
+
+function srcs = sources()
+ % Separated by part
+ srcs = { { 'findClosestCentroids.m' }, ...
+ { 'computeCentroids.m' }, ...
+ { 'pca.m' }, ...
+ { 'projectData.m' }, ...
+ { 'recoverData.m' } ...
+ };
+end
+
+function out = output(partId)
+ % Random Test Cases
+ X = reshape(sin(1:165), 15, 11);
+ Z = reshape(cos(1:121), 11, 11);
+ C = Z(1:5, :);
+ idx = (1 + mod(1:15, 3))';
+ if partId == 1
+ idx = findClosestCentroids(X, C);
+ out = sprintf('%0.5f ', idx(:));
+ elseif partId == 2
+ centroids = computeCentroids(X, idx, 3);
+ out = sprintf('%0.5f ', centroids(:));
+ elseif partId == 3
+ [U, S] = pca(X);
+ out = sprintf('%0.5f ', abs([U(:); S(:)]));
+ elseif partId == 4
+ X_proj = projectData(X, Z, 5);
+ out = sprintf('%0.5f ', X_proj(:));
+ elseif partId == 5
+ X_rec = recoverData(X(:,1:5), Z, 5);
+ out = sprintf('%0.5f ', X_rec(:));
+ end
+end
+
+function url = challenge_url()
+ url = 'http://www.ml-class.org/course/homework/challenge';
+end
+
+function url = submit_url()
+ url = 'http://www.ml-class.org/course/homework/submit';
+end
+
+% ========================= CHALLENGE HELPERS =========================
+
+function src = source(partId)
+ src = '';
+ src_files = sources();
+ if partId <= numel(src_files)
+ flist = src_files{partId};
+ for i = 1:numel(flist)
+ fid = fopen(flist{i});
+ while ~feof(fid)
+ line = fgets(fid);
+ src = [src line];
+ end
+ fclose(fid);
+ src = [src '||||||||'];
+ end
+ end
+end
+
+function ret = isValidPartId(partId)
+ partNames = validParts();
+ ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1);
+end
+
+function partId = promptPart()
+ fprintf('== Select which part(s) to submit:\n', ...
+ homework_id());
+ partNames = validParts();
+ srcFiles = sources();
+ for i = 1:numel(partNames)
+ fprintf('== %d) %s [', i, partNames{i});
+ fprintf(' %s ', srcFiles{i}{:});
+ fprintf(']\n');
+ end
+ fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ...
+ numel(partNames) + 1, numel(partNames) + 1);
+ selPart = input('', 's');
+ partId = str2num(selPart);
+ if ~isValidPartId(partId)
+ partId = -1;
+ end
+end
+
+function [email,ch,signature] = getChallenge(email)
+ str = urlread(challenge_url(), 'post', {'email_address', email});
+
+ str = strtrim(str);
+ [email, str] = strtok (str, '|');
+ [ch, str] = strtok (str, '|');
+ [signature, str] = strtok (str, '|');
+end
+
+
+function [result, str] = submitSolution(email, ch_resp, part, output, ...
+ source, signature)
+
+ params = {'homework', homework_id(), ...
+ 'part', num2str(part), ...
+ 'email', email, ...
+ 'output', output, ...
+ 'source', source, ...
+ 'challenge_response', ch_resp, ...
+ 'signature', signature};
+
+ str = urlread(submit_url(), 'post', params);
+
+ % Parse str to read for success / failure
+ result = 0;
+
+end
+
+% =========================== LOGIN HELPERS ===========================
+
+function [login password] = loginPrompt()
+ % Prompt for password
+ [login password] = basicPrompt();
+
+ if isempty(login) || isempty(password)
+ login = []; password = [];
+ end
+end
+
+
+function [login password] = basicPrompt()
+ login = input('Login (Email address): ', 's');
+ password = input('Password: ', 's');
+end
+
+
+function [str] = challengeResponse(email, passwd, challenge)
+ salt = ')~/|]QMB3[!W`?OVt7qC"@+}';
+ str = sha1([challenge sha1([salt email passwd])]);
+ sel = randperm(numel(str));
+ sel = sort(sel(1:16));
+ str = str(sel);
+end
+
+
+% =============================== SHA-1 ================================
+
+function hash = sha1(str)
+
+ % Initialize variables
+ h0 = uint32(1732584193);
+ h1 = uint32(4023233417);
+ h2 = uint32(2562383102);
+ h3 = uint32(271733878);
+ h4 = uint32(3285377520);
+
+ % Convert to word array
+ strlen = numel(str);
+
+ % Break string into chars and append the bit 1 to the message
+ mC = [double(str) 128];
+ mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')];
+
+ numB = strlen * 8;
+ if exist('idivide')
+ numC = idivide(uint32(numB + 65), 512, 'ceil');
+ else
+ numC = ceil(double(numB + 65)/512);
+ end
+ numW = numC * 16;
+ mW = zeros(numW, 1, 'uint32');
+
+ idx = 1;
+ for i = 1:4:strlen + 1
+ mW(idx) = bitor(bitor(bitor( ...
+ bitshift(uint32(mC(i)), 24), ...
+ bitshift(uint32(mC(i+1)), 16)), ...
+ bitshift(uint32(mC(i+2)), 8)), ...
+ uint32(mC(i+3)));
+ idx = idx + 1;
+ end
+
+ % Append length of message
+ mW(numW - 1) = uint32(bitshift(uint64(numB), -32));
+ mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32));
+
+ % Process the message in successive 512-bit chs
+ for cId = 1 : double(numC)
+ cSt = (cId - 1) * 16 + 1;
+ cEnd = cId * 16;
+ ch = mW(cSt : cEnd);
+
+ % Extend the sixteen 32-bit words into eighty 32-bit words
+ for j = 17 : 80
+ ch(j) = ch(j - 3);
+ ch(j) = bitxor(ch(j), ch(j - 8));
+ ch(j) = bitxor(ch(j), ch(j - 14));
+ ch(j) = bitxor(ch(j), ch(j - 16));
+ ch(j) = bitrotate(ch(j), 1);
+ end
+
+ % Initialize hash value for this ch
+ a = h0;
+ b = h1;
+ c = h2;
+ d = h3;
+ e = h4;
+
+ % Main loop
+ for i = 1 : 80
+ if(i >= 1 && i <= 20)
+ f = bitor(bitand(b, c), bitand(bitcmp(b), d));
+ k = uint32(1518500249);
+ elseif(i >= 21 && i <= 40)
+ f = bitxor(bitxor(b, c), d);
+ k = uint32(1859775393);
+ elseif(i >= 41 && i <= 60)
+ f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d));
+ k = uint32(2400959708);
+ elseif(i >= 61 && i <= 80)
+ f = bitxor(bitxor(b, c), d);
+ k = uint32(3395469782);
+ end
+
+ t = bitrotate(a, 5);
+ t = bitadd(t, f);
+ t = bitadd(t, e);
+ t = bitadd(t, k);
+ t = bitadd(t, ch(i));
+ e = d;
+ d = c;
+ c = bitrotate(b, 30);
+ b = a;
+ a = t;
+
+ end
+ h0 = bitadd(h0, a);
+ h1 = bitadd(h1, b);
+ h2 = bitadd(h2, c);
+ h3 = bitadd(h3, d);
+ h4 = bitadd(h4, e);
+
+ end
+
+ hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]);
+
+ hash = lower(hash);
+
+end
+
+function ret = bitadd(iA, iB)
+ ret = double(iA) + double(iB);
+ ret = bitset(ret, 33, 0);
+ ret = uint32(ret);
+end
+
+function ret = bitrotate(iA, places)
+ t = bitshift(iA, places - 32);
+ ret = bitshift(iA, places);
+ ret = bitor(ret, t);
+end
View
352 mlclass-ex7/mlclass-ex7/submitWeb.m
@@ -0,0 +1,352 @@
+function submitWeb(partId)
+%SUBMITWEB Generates a base64 encoded string for web-based submissions
+% SUBMITWEB() will generate a base64 encoded string so that you can submit your
+% solutions via a web form
+
+ fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
+ homework_id());
+ if ~exist('partId', 'var') || isempty(partId)
+ partId = promptPart();
+ end
+
+ % Check valid partId
+ partNames = validParts();
+ if ~isValidPartId(partId)
+ fprintf('!! Invalid homework part selected.\n');
+ fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames));
+ fprintf('!! Submission Cancelled\n');
+ return
+ end
+
+ [login] = loginPrompt();
+ if isempty(login)
+ fprintf('!! Submission Cancelled\n');
+ return
+ end
+
+ [result] = submitSolution(login, partId, output(partId), ...
+ source(partId));
+ result = base64encode(result);
+
+ fprintf('\nSave as submission file [submit_ex%s_part%d.txt]: ', ...
+ homework_id(), partId);
+ saveAsFile = input('', 's');
+ if (isempty(saveAsFile))
+ saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), partId);
+ end
+
+ fid = fopen(saveAsFile, 'w');
+ if (fid)
+ fwrite(fid, result);
+ fclose(fid);
+ fprintf('\nSaved your solutions to %s.\n\n', saveAsFile);
+ fprintf(['You can now submit your solutions through the web \n' ...
+ 'form in the programming exercises. Select the corresponding \n' ...
+ 'programming exercise to access the form.\n']);
+
+ else
+ fprintf('Unable to save to %s\n\n', saveAsFile);
+ fprintf(['You can create a submission file by saving the \n' ...
+ 'following text in a file: (press enter to continue)\n\n']);
+ pause;
+ fprintf(result);
+ end
+
+end
+
+% ================== CONFIGURABLES FOR EACH HOMEWORK ==================
+
+function id = homework_id()
+ id = '7';
+end
+
+function [partNames] = validParts()
+ partNames = {
+ 'Find Closest Centroids (k-Means)', ...
+ 'Compute Centroid Means (k-Means)' ...
+ 'PCA', ...
+ 'Project Data (PCA)', ...
+ 'Recover Data (PCA)' ...
+ };
+end
+
+function srcs = sources()
+ % Separated by part
+ srcs = { { 'findClosestCentroids.m' }, ...
+ { 'computeCentroids.m' }, ...
+ { 'pca.m' }, ...
+ { 'projectData.m' }, ...
+ { 'recoverData.m' } ...
+ };
+end
+
+function out = output(partId)
+ % Random Test Cases
+ X = reshape(sin(1:165), 15, 11);
+ Z = reshape(cos(1:121), 11, 11);
+ C = Z(1:5, :);
+ idx = (1 + mod(1:15, 3))';
+ if partId == 1
+ idx = findClosestCentroids(X, C);
+ out = sprintf('%0.5f ', idx(:));
+ elseif partId == 2
+ centroids = computeCentroids(X, idx, 3);
+ out = sprintf('%0.5f ', centroids(:));
+ elseif partId == 3
+ [U, S] = pca(X);
+ out = sprintf('%0.5f ', abs([U(:); S(:)]));
+ elseif partId == 4
+ X_proj = projectData(X, Z, 5);
+ out = sprintf('%0.5f ', X_proj(:));
+ elseif partId == 5
+ X_rec = recoverData(X(:,1:5), Z, 5);
+ out = sprintf('%0.5f ', X_rec(:));
+ end
+end
+
+
+% ========================= SUBMIT HELPERS =========================
+
+function src = source(partId)
+ src = '';
+ src_files = sources();
+ if partId <= numel(src_files)
+ flist = src_files{partId};
+ for i = 1:numel(flist)
+ fid = fopen(flist{i});
+ while ~feof(fid)
+ line = fgets(fid);
+ src = [src line];
+ end
+ fclose(fid);
+ src = [src '||||||||'];
+ end
+ end
+end
+
+function ret = isValidPartId(partId)
+ partNames = validParts();
+ ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames));
+end
+
+function partId = promptPart()
+ fprintf('== Select which part(s) to submit:\n', ...
+ homework_id());
+ partNames = validParts();
+ srcFiles = sources();
+ for i = 1:numel(partNames)
+ fprintf('== %d) %s [', i, partNames{i});
+ fprintf(' %s ', srcFiles{i}{:});
+ fprintf(']\n');
+ end
+ fprintf('\nEnter your choice [1-%d]: ', ...
+ numel(partNames));
+ selPart = input('', 's');
+ partId = str2num(selPart);
+ if ~isValidPartId(partId)
+ partId = -1;
+ end
+end
+
+
+function [result, str] = submitSolution(email, part, output, source)
+
+ result = ['a:5:{' ...
+ p_s('homework') p_s64(homework_id()) ...
+ p_s('part') p_s64(part) ...
+ p_s('email') p_s64(email) ...
+ p_s('output') p_s64(output) ...
+ p_s('source') p_s64(source) ...
+ '}'];
+
+end
+
+function s = p_s(str)
+ s = ['s:' num2str(numel(str)) ':"' str '";'];
+end
+
+function s = p_s64(str)
+ str = base64encode(str, '');
+ s = ['s:' num2str(numel(str)) ':"' str '";'];
+end
+
+% =========================== LOGIN HELPERS ===========================
+
+function [login] = loginPrompt()
+ % Prompt for password
+ [login] = basicPrompt();
+end
+
+
+function [login] = basicPrompt()
+ login = input('Login (Email address): ', 's');
+end
+
+
+% =========================== Base64 Encoder ============================
+% Thanks to Peter John Acklam
+%
+
+function y = base64encode(x, eol)
+%BASE64ENCODE Perform base64 encoding on a string.
+%
+% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending
+% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10).
+% The returned encoded string is broken into lines of no more than 76
+% characters each, and each line will end with EOL unless it is empty. Let
+% EOL be empty if you do not want the encoded string broken into lines.
+%
+% STR and EOL don't have to be strings (i.e., char arrays). The only
+% requirement is that they are vectors containing values in the range 0-255.
+%
+% This function may be used to encode strings into the Base64 encoding
+% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The
+% Base64 encoding is designed to represent arbitrary sequences of octets in a
+% form that need not be humanly readable. A 65-character subset
+% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per
+% printable character.
+%
+% Examples
+% --------
+%
+% If you want to encode a large file, you should encode it in chunks that are
+% a multiple of 57 bytes. This ensures that the base64 lines line up and
+% that you do not end up with padding in the middle. 57 bytes of data fills
+% one complete base64 line (76 == 57*4/3):
+%
+% If ifid and ofid are two file identifiers opened for reading and writing,
+% respectively, then you can base64 encode the data with
+%
+% while ~feof(ifid)
+% fwrite(ofid, base64encode(fread(ifid, 60*57)));
+% end
+%
+% or, if you have enough memory,
+%
+% fwrite(ofid, base64encode(fread(ifid)));
+%
+% See also BASE64DECODE.
+
+% Author: Peter John Acklam
+% Time-stamp: 2004-02-03 21:36:56 +0100
+% E-mail: pjacklam@online.no
+% URL: http://home.online.no/~pjacklam
+
+ if isnumeric(x)
+ x = num2str(x);
+ end
+
+ % make sure we have the EOL value
+ if nargin < 2
+ eol = sprintf('\n');
+ else
+ if sum(size(eol) > 1) > 1
+ error('EOL must be a vector.');
+ end
+ if any(eol(:) > 255)
+ error('EOL can not contain values larger than 255.');
+ end
+ end
+
+ if sum(size(x) > 1) > 1
+ error('STR must be a vector.');
+ end
+
+ x = uint8(x);
+ eol = uint8(eol);
+
+ ndbytes = length(x); % number of decoded bytes
+ nchunks = ceil(ndbytes / 3); % number of chunks/groups
+ nebytes = 4 * nchunks; % number of encoded bytes
+
+ % add padding if necessary, to make the length of x a multiple of 3
+ if rem(ndbytes, 3)
+ x(end+1 : 3*nchunks) = 0;
+ end
+
+ x = reshape(x, [3, nchunks]); % reshape the data
+ y = repmat(uint8(0), 4, nchunks); % for the encoded data
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ % Split up every 3 bytes into 4 pieces
+ %
+ % aaaaaabb bbbbcccc ccdddddd
+ %
+ % to form
+ %
+ % 00aaaaaa 00bbbbbb 00cccccc 00dddddd
+ %
+ y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:)
+
+ y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:)
+ y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:)
+
+ y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:)
+ y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:)
+
+ y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:)
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ % Now perform the following mapping
+ %
+ % 0 - 25 -> A-Z
+ % 26 - 51 -> a-z
+ % 52 - 61 -> 0-9
+ % 62 -> +
+ % 63 -> /
+ %
+ % We could use a mapping vector like
+ %
+ % ['A':'Z', 'a':'z', '0':'9', '+/']
+ %
+ % but that would require an index vector of class double.
+ %
+ z = repmat(uint8(0), size(y));
+ i = y <= 25; z(i) = 'A' + double(y(i));
+ i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i));
+ i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i));
+ i = y == 62; z(i) = '+';
+ i = y == 63; z(i) = '/';
+ y = z;
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ % Add padding if necessary.
+ %
+ npbytes = 3 * nchunks - ndbytes; % number of padding bytes
+ if npbytes
+ y(end-npbytes+1 : end) = '='; % '=' is used for padding
+ end
+
+ if isempty(eol)
+
+ % reshape to a row vector
+ y = reshape(y, [1, nebytes]);
+
+ else
+
+ nlines = ceil(nebytes / 76); % number of lines
+ neolbytes = length(eol); % number of bytes in eol string
+
+ % pad data so it becomes a multiple of 76 elements
+ y = [y(:) ; zeros(76 * nlines - numel(y), 1)];
+ y(nebytes + 1 : 76 * nlines) = 0;
+ y = reshape(y, 76, nlines);
+
+ % insert eol strings
+ eol = eol(:);
+ y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines));
+
+ % remove padding, but keep the last eol string
+ m = nebytes + neolbytes * (nlines - 1);
+ n = (76+neolbytes)*nlines - neolbytes;
+ y(m+1 : n) = '';
+
+ % extract and reshape to row vector
+ y = reshape(y, 1, m+neolbytes);
+
+ end
+
+ % output is a character array
+ y = char(y);
+
+end

0 comments on commit 2f52a17

Please sign in to comment.
Something went wrong with that request. Please try again.