In [1]:
# default_exp ml.data

# ML data

> ML data handling functions.

In [2]:
#export
from expoco.core import *
from pathlib import Path
import pandas as pd

In [3]:
#export
def viseme_dataset_from_capture_sessions(path='data/capture_session', glob_pattern='viseme*'):
    "Create a viseme dataset from capture session data"
    path, dataset_id= Path(path), now()
    output_path = path.parent/f'viseme_dataset_{dataset_id}'
    output_path.mkdir()
    log = LogFile(output_path/'data.log')
    log('output_path:', output_path.resolve())
    df = pd.DataFrame()
    for session_path in sorted(path.glob(glob_pattern)):
        expression_id = int(str(session_path)[-1])
        log('session_path:', session_path, 'expression_id:', expression_id)
        _df = pd.read_csv(session_path/'data.csv')
        _df['expression_id'] = expression_id
        df = pd.concat([df, _df])
    file_name=output_path/'data.csv'
    log('file_name:', file_name)
    df.to_csv(file_name, index=False)

```
viseme_dataset_from_capture_sessions()
```

```
LogFile: C:\Users\Butterp\github\pete88b\expoco\data\viseme_dataset_20211019_202836\data.log
```

In [4]:
#export
class VisemeDatasetHelper:
    def __init__(self, dataset_id, path='data', y_name='expression_id'):
        self.path = Path(path)/f'viseme_dataset_{dataset_id}'
        self.df = pd.read_csv(self.path/'data.csv')
        self.y_name = y_name
        self.output_path = self.path/f'processed_{now()}'
        self.output_path.mkdir()
        self.log = LogFile(self.output_path/'data.log')
        self.log('output_path:', self.output_path.resolve(),
                 '\ndataset_id:', dataset_id, 
                 '\npath:', self.path.resolve(),
                 '\ny_name:', y_name)
    def make_landmarks_relative(self, to_landmark_id=FaceLandmarks.tip_of_nose):
        self.log('make_landmarks_relative to_landmark_id:', to_landmark_id)
        self.relative_to_landmark_id = to_landmark_id
        for i in range(FaceLandmarks.count):
            self.df[f'{i}x']=self.df[f'{i}x']-self.df[f'{to_landmark_id}x']
            self.df[f'{i}y']=self.df[f'{i}y']-self.df[f'{to_landmark_id}y']
        return self
    def normalize(self):
        mean, std = self.df[self.cont_names].mean(), self.df[self.cont_names].std()
        self.stats_df = pd.DataFrame([mean, std])
        file_name=self.output_path/'stats.csv'
        self.log('stats_file_name:', file_name)
        self.stats_df.to_csv(file_name, index=False)
        self.df.loc[:, self.cont_names] = (self.df.loc[:, self.cont_names] - mean) / std
        return self
    def combine_targets(self, change_expression_id, to_expression_id):
        self.log('combine_targets change_expression_id:', change_expression_id, 'to_expression_id:', to_expression_id)
        self.df.loc[self.df[self.y_name]==change_expression_id, self.y_name]=to_expression_id
        return self
    def setup_cont_names(self):
        relative_to_landmark_id = getattr(self, 'relative_to_landmark_id', -1)
        self.cont_names = []
        for i in FaceLandmarks.pointer + FaceLandmarks.mouth:
            if i == relative_to_landmark_id: continue
            self.cont_names.append(f'{i}x')
            self.cont_names.append(f'{i}y')
        self.log('cont_names:', self.cont_names)
        self.df = self.df[self.cont_names + [self.y_name]]
        return self
    def save(self):
        file_name=self.output_path/'data.csv'
        self.log('data_file_name:', file_name)
        self.df.to_csv(file_name, index=False)
        return self

```
(VisemeDatasetHelper('20211019_202836')
       .make_landmarks_relative()
       .setup_cont_names()
       .combine_targets(4, 0)
       .normalize()
       .save())
```

```
LogFile: C:\Users\Butterp\github\pete88b\expoco\data\viseme_dataset_20211019_202836\processed_20211019_205023\data.log
```

In [6]:
vdh = (VisemeDatasetHelper('20211019_202836')
       .make_landmarks_relative()
       .setup_cont_names()
       .combine_targets(4, 0)
       .normalize()
       .save())
assert vdh.df is not None
assert vdh.stats_df is not None
assert vdh.cont_names is not None

LogFile: C:\Users\Butterp\github\pete88b\expoco\data\viseme_dataset_20211019_202836\processed_20211021_102400\data.log


In [7]:
data_df=pd.read_csv('data/viseme_dataset_20211019_202836/data.csv')
processed_data_df=pd.read_csv('data/viseme_dataset_20211019_202836/processed_20211019_205023/data.csv')
stats_df=pd.read_csv('data/viseme_dataset_20211019_202836/processed_20211019_205023/stats.csv')

In [8]:
stats_df

Unnamed: 0,5x,5y,2x,2y,218x,218y,438x,438y,0x,0y,...,408x,408y,409x,409y,410x,410y,415x,415y,424x,424y
0,0.452395,0.515518,0.454705,0.578173,0.427435,0.556774,0.480441,0.553447,0.002705,0.062299,...,0.498727,0.640504,0.502321,0.637451,0.513336,0.624293,0.49187,0.640139,0.511556,0.685984
1,0.059308,0.034584,0.05535,0.033445,0.056226,0.034149,0.056446,0.034126,0.005679,0.005095,...,0.04827,0.029978,0.048222,0.02991,0.047887,0.029193,0.047718,0.029696,0.045034,0.027005


In [9]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_core.ipynb.
Converted 05a_ml_data.ipynb.
Converted 05b_ml_model.ipynb.
Converted 10a_mediapipe_face_mesh_identify_landmarks.ipynb.
No export destination, ignored:
#export
import ipywidgets as widgets
import numpy as np
import pandas as pd
import cv2, time, math, json, shutil
import win32api, win32con

import mediapipe as mp
mp_face_mesh = mp.solutions.face_mesh

from pathlib import Path
No export destination, ignored:
#export
def _now(): 
    return datetime.datetime.utcnow().strftime('%Y%m%d_%H%M%S')
No export destination, ignored:
#export
def _new_capture_metadata(stop_after, path, video_capture, expression_id, comments):
    width, height = [int(video_capture.get(p)) for p in [cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT]]
    assert width >= height
    return dict(count=0, stop_after=stop_after, path=str(path.resolve()), expression_id=expression_id,
                capture_width=width, capture_height=height, start_date=_now(), comments=comments)
No export destin

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Butterp\\github\\pete88b\\expoco\\expoco\\None.py'