# Imports

In [None]:
## Import necessary libraries here
import os
import random
import cv2
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline
from google.colab.patches import cv2_imshow
import itertools
import copy
from math import sqrt
import math
from scipy.linalg import sqrtm
import plotly.graph_objects as go

# Affine Structure from Motion

## Overview
<img src="https://drive.google.com/file/d/145B7vkdfyIj8GqHh9rltyetgVt7T4cGi" width="800"/>

- Given a sequence of images, corners are detected & their features are tracked 
[in this problem](https://colab.research.google.com/drive/1j4vIlUUfVCkMSonFKc_d83pfeOYqy288?usp=sharing).

- The current problem of affine structure from motion involves recovering a 3D point cloud from an image sequence and the below are the steps involved:

<img src="https://drive.google.com/file/d/1KQ2x8BywgzFUgXCcYriYM5bwwXtupY3i" width="700"/>


## Data

**WARNING: Colab deletes all files everytime runtime is disconnected. Make sure to re-download the inputs when it happens.**

In [None]:
# Download Data -- run this cell only one time per runtime
!gdown 1A0Rin_YMmWkExjI99vfLYvU_dy-9gFTT
!unzip "/content/Part2_data.zip" -d "/content/"
# Load Matches
data = loadmat('/content/Part2_data/tracks.mat')

Downloading...
From: https://drive.google.com/uc?id=1A0Rin_YMmWkExjI99vfLYvU_dy-9gFTT
To: /content/Part2_data.zip

  0% 0.00/5.44M [00:00<?, ?B/s]
100% 5.44M/5.44M [00:00<00:00, 254MB/s]
Archive:  /content/Part2_data.zip
   creating: /content/Part2_data/
   creating: /content/Part2_data/images/
  inflating: /content/Part2_data/images/hotel.seq0.png  
  inflating: /content/Part2_data/images/hotel.seq1.png  
  inflating: /content/Part2_data/images/hotel.seq10.png  
  inflating: /content/Part2_data/images/hotel.seq11.png  
  inflating: /content/Part2_data/images/hotel.seq12.png  
  inflating: /content/Part2_data/images/hotel.seq13.png  
  inflating: /content/Part2_data/images/hotel.seq14.png  
  inflating: /content/Part2_data/images/hotel.seq15.png  
  inflating: /content/Part2_data/images/hotel.seq16.png  
  inflating: /content/Part2_data/images/hotel.seq17.png  
  inflating: /content/Part2_data/images/hotel.seq18.png  
  inflating: /content/Part2_data/images/hotel.seq19.png  
  inflat

## Code

In [None]:
track_x = data['track_x']
track_y = data['track_y']

D = np.block([[data['track_x'][:, i], data['track_y'][:, i]] for i in range(data['track_y'].shape[1])]).reshape((102, -1))
# Remove the nan values (points which went out of frame)
nans = np.isnan(D)
keep_indices = []
for i in range(D.shape[1]):
  d_col = nans[:,i].tolist()
  if not any(d_col):
    keep_indices.append(i)
track_x = track_x[keep_indices, :]
track_y = track_y[keep_indices, :]

def affineSFM(x, y):
  '''
  Function: Affine structure from motion algorithm
  % Normalize x, y to zero mean
  % Create measurement matrix
  D = [xn' ; yn'];
  % Decompose and enforce rank 3
  % Apply orthographic constraints
  '''
  mean_x = np.mean(x, axis=0)
  mean_y = np.mean(y, axis=0)
  x -= mean_x
  y -= mean_y
  D = np.block([[x[:, i], y[:, i]] for i in range(y.shape[1])]).reshape((102, -1))
  U, S, VT = np.linalg.svd(D)
  U3 = U[:, :3]
  S3 = np.diag(S[:3])
  VT3 = VT[:3, :]
  S3_sqrt = sqrtm(S3)
  A_ = U3 @ S3_sqrt
  X_ = S3_sqrt @ VT3
  scalar_mat = []
  for i in range(0, A_.shape[0], 2):
    a1 =  A_[i]
    a2 = A_[i+1]
    block1 = (np.expand_dims(a1, axis=1) @ np.expand_dims(a1, axis=0)).reshape((1,9))
    block2 = (np.expand_dims(a2, axis=1) @ np.expand_dims(a2, axis=0)).reshape((1,9))
    block3 = (np.expand_dims(a1, axis=1) @ np.expand_dims(a2, axis=0)).reshape((1,9))
    if i==0:
      coeff_mat = block1
    else:
      coeff_mat = np.concatenate((coeff_mat, block1), axis=0)
    coeff_mat = np.concatenate((coeff_mat, block2), axis=0)
    coeff_mat = np.concatenate((coeff_mat, block3), axis=0)
    scalar_mat += [1,1,0]
  scalar_mat = np.array(scalar_mat)
  least_sq = np.linalg.lstsq(coeff_mat, scalar_mat, rcond=None)
  L = least_sq[0].reshape((3,3))
  w, v = np.linalg.eig(L)
  C = np.linalg.cholesky(L)
  A = A_ @ C
  X = np.linalg.inv(C) @ X_
  return A, X

def plot_structure(X):
  fig = go.Figure(data=[go.Scatter3d(x=X[0, :], y=X[1, :], z=X[2, :], mode='markers',
    marker=dict(size=3))])
  fig.show()

def plot_camera_motion(A):
  for i in range(0, A.shape[0], 2):
    position = np.cross(A[i], A[i+1])
    mag = np.linalg.norm(position)
    norm_position = position / mag
    norm_position = np.expand_dims(norm_position, axis=0)
    if i==0:
      cam_motion = norm_position
    else:
      cam_motion = np.concatenate((cam_motion, norm_position), axis=0)
  print(cam_motion.shape)
  fig = go.Figure(data=[go.Scatter3d(x=cam_motion[:, 0], y=cam_motion[:, 1], z=cam_motion[:, 2], mode='markers',
    marker=dict(size=3))])
  # fig = go.Figure()
  # fig.add_trace(go.Scatter(x=np.linspace(0, A.shape[0]/2, num=1+A.shape[0]//2), y=cam_motion[:, 0], mode='lines', name='x'))
  # fig.add_trace(go.Scatter(x=np.linspace(0, A.shape[0]/2, num=1+A.shape[0]//2), y=cam_motion[:, 1], mode='lines', name='y'))
  # fig.add_trace(go.Scatter(x=np.linspace(0, A.shape[0]/2, num=1+A.shape[0]//2), y=cam_motion[:, 2], mode='lines', name='z'))
  fig.show()

A, X = affineSFM(track_x, track_y)
print(A.shape, X.shape)
plot_structure(X)
plot_camera_motion(A)

### Results

*   The predicted 3D locations of the tracked points for 3 different viewpoints.
*   The predicted 3D path of the camera.


- 3D Structure Visualizations:
  - <img src="https://drive.google.com/uc?id=1j9_aviLnIdeEPcDrt5CcmdNIi-gBj5KD"/>
  - <img src="https://drive.google.com/uc?id=1_3sXKJTJswiq4-jo1E2vkPIEP8gXakA0"/>
  - <img src="https://drive.google.com/uc?id=1En4G3LZ1NR61qXeWdZegyK2Hyf4t22zq"/>
  - <img src="https://drive.google.com/uc?id=185GUlxabZtI2lKH9OwXO5yBjLk1n73rx"/>

- Camera motion:
  - 3D plot:
    - <img src="https://drive.google.com/uc?id=1EtwMMDKceDsgh3S13ZKYWgEff3wl_HnF"/>
  - Separate 2D plots (x, y & z in order):
    - <img src="https://drive.google.com/uc?id=1BQ-b7zp9qM3w82oCNYbn3ya00Ox83EUJ"/>
    - <img src="https://drive.google.com/uc?id=1aqmDNXmDPBdsbwVTdLrwFaJShvOAEvys"/>
    - <img src="https://drive.google.com/uc?id=1oUZw8USY99joCoIB4prjl9CbkvW1YZAk"/>
    
<!--     

*   Reference: 
    - Tomasi and Kanade. Shape and Motion from Image Streams under Orthography: a Factorization Method. 1992 -->