# Hoof Learning

---

**Variables in this study:**

* Healthiness of horse (binary)
  
* Which foot of horse (right/left, front/back)
     - All data has been collected on right hoofs (as of now)

* Type of shoe (Unshod, Heart, Egg, Standard)

**Goal:**

* Use DW, SM, CB, P3 Lateral coordinates in combination to predict *P3 lateral*. 

* Use healthiness of horse and shoe type as variables for input on the model.

* Clean data so we can sample time and view displacement accurately


### Rita's code for smoothing : 

```js

windowSize = 40
window = np.hanning(windowSize)
window = window / window.sum()
filter the data using convolution
filtered = np.convolve(window, data, mode='valid')

```

---
  
**To-Do:**

* Develop a multivariate time series machine learning model

* Look into polynomial regression

* Plot each coordinates w.r.t. time

* Smooth data

* Get a neural model workin
  
---

![P3 Displacement](hoof_study.png)

---

# Table of Contents

- [Imports](#id-section1)
- [DataFrame of All Variables](#id-section2)
- [Data Selection](#id-section3)
- [Linear Regression Model](#id-section4)

> If an error is raised when accessing the data about .DS_store : 
> 
> Navigate to the directory where the data is stored with cd in terminal, then copy & paste the following command:
> 
> find . -name '.DS_store' -type f -delete

---


<div id='id-section1'/>

# Imports

In [2]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pickle



<div id='id-section2'/>

# DataFrame with all variables

In [3]:
frames = []
count = 1
frame = pd.DataFrame(columns=['Date', 'Shoe', 'Time', 'DW coordinates',
                     'SM coordinates', 'CB coordinates', 'P3 Lateral coordinates'])
for date in os.listdir("/Path/to/data_directory"):
    if date == "07-21":
        for shoe in os.listdir("/Path/to/data_directory/" + date + "/"):
            for trial in os.listdir("/Path/to/data_directory/" + date + "/" + shoe + "/"):
                data = pd.read_csv(
                    "/Path/to/data_directory/" + date + "/" + shoe + "/" + trial)
                time = np.array(data.iloc[2:, 0])
                DW = [tuple(xyz)
                      for xyz in data.iloc[2:, [1, 2, 3]].to_numpy()]
                SM = [tuple(xyz)
                      for xyz in data.iloc[2:, [4, 5, 6]].to_numpy()]
                CB = [tuple(xyz)
                      for xyz in data.iloc[2:, [7, 8, 9]].to_numpy()]
                P3_lateral = [tuple(xyz) for xyz in data.iloc[2:, [
                    10, 11, 12]].to_numpy()]
                l = [date, shoe, time, np.array(DW), np.array(
                     SM), np.array(CB), np.array(P3_lateral)]
                frame.loc[count] = l
                count += 1
                # drop null values
                data.drop(data.tail(251).index, inplace=True)
                # Drop titles and units rows
                data.drop(data.head(3).index, inplace=True)
                data = data.rename(columns={'Unnamed: 0': 'Time (s)', 'DW': 'DW_x', 'Unnamed: 2': 'DW_y', 'Unnamed: 3': 'DW_z', 'SM': 'SM_x', 'Unnamed: 5': 'SM_y',
                                            'Unnamed: 6': 'SM_z', 'CB': 'CB_x', 'Unnamed: 8': 'CB_y', 'Unnamed: 9': 'CB_z', 'P3  Lateral': 'P3_x', 'Unnamed: 11': 'P3_y', 'Unnamed: 12': 'P3_z'})
                # Drop medial coordinates
                data.drop(columns=data.columns[-3:], axis=1,  inplace=True)
                frames.append(data)
df = pd.concat(frames)
df = df.dropna()
frame


FileNotFoundError: [Errno 2] No such file or directory: '/Path/to/data_directory'

<div id='id-section3'/>

## Data selection

In [None]:
# Input the "shoe type" and "healthy/non-healthy" parameters
shoe_type = input(
    "Select which type of shoe to perform the model on : \n Standards = [1]\nHeartbars = [2]\nEggbars = [3]\nUnshods = [4]")
health_status = input(
    "Select healthiness of horse boolean : \nHealthy = [1]\nNot healthy = [0]\n")
print('*'*90, '\nhealth_status feature is not developed yet. Either input will yeild the same result.\n', '*'*90)

frames = []
count = 1
frame = pd.DataFrame(columns=['Date', 'Shoe', 'Time', 'DW coordinates',
                     'SM coordinates', 'CB coordinates', 'P3 Lateral coordinates'])
for date in os.listdir("/Path/to/data_directory"):
    if date == "07-21":
        for shoe in os.listdir("/Path/to/data_directory/" + date + "/"):
            if shoe_type == '1' and "Standard" in shoe:  # Standards
                for trial in os.listdir("/Path/to/data_directory/" + date + "/" + shoe + "/"):
                    data = pd.read_csv(
                        "/Path/to/data_directory/" + date + "/" + shoe + "/" + trial)

                    # drop null values
                    data.drop(data.tail(251).index, inplace=True)
                    # Drop titles and units rows
                    data.drop(data.head(3).index, inplace=True)
                    data = data.rename(columns={'Unnamed: 0': 'Time (s)',
                                                'DW': 'DW_x', 'Unnamed: 2': 'DW_y', 'Unnamed: 3': 'DW_z', 'SM': 'SM_x', 'Unnamed: 5': 'SM_y', 'Unnamed: 6': 'SM_z',
                                                'CB': 'CB_x', 'Unnamed: 8': 'CB_y', 'Unnamed: 9': 'CB_z', 'P3  Lateral': 'P3_x', 'Unnamed: 11': 'P3_y', 'Unnamed: 12': 'P3_z'})
                    # Drop medial coordinates
                    data.drop(columns=data.columns[-3:], axis=1,  inplace=True)
                    frames.append(data)
            elif shoe_type == '2' and "Heartbar" in shoe:  # Heartbars
                for trial in os.listdir("/Path/to/data_directory/" + date + "/" + shoe + "/"):
                    data = pd.read_csv(
                        "/Path/to/data_directory/" + date + "/" + shoe + "/" + trial)

                    # drop null values
                    data.drop(data.tail(251).index, inplace=True)
                    # Drop titles and units rows
                    data.drop(data.head(3).index, inplace=True)
                    data = data.rename(columns={'Unnamed: 0': 'Time (s)',
                                                'DW': 'DW_x', 'Unnamed: 2': 'DW_y', 'Unnamed: 3': 'DW_z', 'SM': 'SM_x', 'Unnamed: 5': 'SM_y', 'Unnamed: 6': 'SM_z',
                                                'CB': 'CB_x', 'Unnamed: 8': 'CB_y', 'Unnamed: 9': 'CB_z', 'P3  Lateral': 'P3_x', 'Unnamed: 11': 'P3_y', 'Unnamed: 12': 'P3_z'})
                    # Drop medial coordinates
                    data.drop(columns=data.columns[-3:], axis=1,  inplace=True)
                    frames.append(data)
            elif shoe_type == '3' and "Eggbar" in shoe:  # Eggbars
                for trial in os.listdir("/Path/to/data_directory/" + date + "/" + shoe + "/"):
                    data = pd.read_csv(
                        "/Path/to/data_directory/" + date + "/" + shoe + "/" + trial)

                    # drop null values
                    data.drop(data.tail(251).index, inplace=True)
                    # Drop titles and units rows
                    data.drop(data.head(3).index, inplace=True)
                    data = data.rename(columns={'Unnamed: 0': 'Time (s)',
                                                'DW': 'DW_x', 'Unnamed: 2': 'DW_y', 'Unnamed: 3': 'DW_z', 'SM': 'SM_x', 'Unnamed: 5': 'SM_y', 'Unnamed: 6': 'SM_z',
                                                'CB': 'CB_x', 'Unnamed: 8': 'CB_y', 'Unnamed: 9': 'CB_z', 'P3  Lateral': 'P3_x', 'Unnamed: 11': 'P3_y', 'Unnamed: 12': 'P3_z'})
                    # Drop medial coordinates
                    data.drop(columns=data.columns[-3:], axis=1,  inplace=True)
                    frames.append(data)
            elif shoe_type == '4' and "Unshod" in shoe:  # Unshods
                for trial in os.listdir("/Path/to/data_directory/" + date + "/" + shoe + "/"):
                    data = pd.read_csv(
                        "/Path/to/data_directory/" + date + "/" + shoe + "/" + trial)

                    # drop null values
                    data.drop(data.tail(251).index, inplace=True)
                    # Drop titles and units rows
                    data.drop(data.head(3).index, inplace=True)
                    data = data.rename(columns={'Unnamed: 0': 'Time (s)',
                                                'DW': 'DW_x', 'Unnamed: 2': 'DW_y', 'Unnamed: 3': 'DW_z', 'SM': 'SM_x', 'Unnamed: 5': 'SM_y', 'Unnamed: 6': 'SM_z',
                                                'CB': 'CB_x', 'Unnamed: 8': 'CB_y', 'Unnamed: 9': 'CB_z', 'P3  Lateral': 'P3_x', 'Unnamed: 11': 'P3_y', 'Unnamed: 12': 'P3_z'})
                    # Drop medial coordinates
                    data.drop(columns=data.columns[-3:], axis=1,  inplace=True)
                    frames.append(data)

df = pd.concat(frames)
df = df.dropna()
if shoe_type == '1':
    print("="*30, 'Standards', "="*30)
if shoe_type == '2':
    print("="*30, 'Heartbars', "="*30)
if shoe_type == '3':
    print("="*30, 'Eggbars', "="*30)
if shoe_type == '4':
    print("="*30, 'Unshods', "="*30)
df


<div id='id-section4'/>

## Linear Regression Model

In [None]:
X = df[['Time (s)', 'DW_x', 'DW_y', 'DW_z', 'SM_x', 'SM_y',
        'SM_z', 'CB_x', 'CB_y', 'CB_z']]  # inputs
y = df[['P3_x', 'P3_y', 'P3_z']]  # targets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0, shuffle=False)  # splitting

model = LinearRegression()
model.fit(X_train, y_train)

# Evaluating model
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Error: {rmse}')
print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
# r^2 value showing not very good correlation in data (maybe will change with healthiness implemented?)
model.score(X_test, y_test)


In [None]:
plt.plot(y_pred[0], color = 'blue')
# plt.plot(y_pred[1], color = 'green')
# plt.plot(y_pred[2], color = 'red')
plt.show()

## Saving Model

In [None]:
filename = 'linear_regression_model.sav'
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.fit(X_train, y_train)
model_predictions = loaded_model.predict(X_test)
