# ライブラリーのインポート

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
from torch.utils.data.sampler import SubsetRandomSampler

  from .autonotebook import tqdm as notebook_tqdm


# データのインポート

In [2]:
train = pd.read_csv('training.csv')
test = pd.read_csv('test.csv')

print(train.shape)
print(test.shape)
print(train.isna().sum())

(7049, 31)
(1783, 2)
left_eye_center_x              10
left_eye_center_y              10
right_eye_center_x             13
right_eye_center_y             13
left_eye_inner_corner_x      4778
left_eye_inner_corner_y      4778
left_eye_outer_corner_x      4782
left_eye_outer_corner_y      4782
right_eye_inner_corner_x     4781
right_eye_inner_corner_y     4781
right_eye_outer_corner_x     4781
right_eye_outer_corner_y     4781
left_eyebrow_inner_end_x     4779
left_eyebrow_inner_end_y     4779
left_eyebrow_outer_end_x     4824
left_eyebrow_outer_end_y     4824
right_eyebrow_inner_end_x    4779
right_eyebrow_inner_end_y    4779
right_eyebrow_outer_end_x    4813
right_eyebrow_outer_end_y    4813
nose_tip_x                      0
nose_tip_y                      0
mouth_left_corner_x          4780
mouth_left_corner_y          4780
mouth_right_corner_x         4779
mouth_right_corner_y         4779
mouth_center_top_lip_x       4774
mouth_center_top_lip_y       4774
mouth_center_bottom_lip_x  

# データの前処理

In [3]:
train.fillna(method='ffill', inplace=True)

In [4]:
train.head().T

Unnamed: 0,0,1,2,3,4
left_eye_center_x,66.033564,64.332936,65.057053,65.225739,66.725301
left_eye_center_y,39.002274,34.970077,34.909642,37.261774,39.621261
right_eye_center_x,30.227008,29.949277,30.903789,32.023096,32.24481
right_eye_center_y,36.421678,33.448715,34.909642,37.261774,38.042032
left_eye_inner_corner_x,59.582075,58.85617,59.412,60.003339,58.56589
left_eye_inner_corner_y,39.647423,35.274349,36.320968,39.127179,39.621261
left_eye_outer_corner_x,73.130346,70.722723,70.984421,72.314713,72.515926
left_eye_outer_corner_y,39.969997,36.187166,36.320968,38.380967,39.884466
right_eye_inner_corner_x,36.356571,36.034723,37.678105,37.618643,36.98238
right_eye_inner_corner_y,37.389402,34.361532,36.320968,38.754115,39.094852


In [5]:
test.head().T

Unnamed: 0,0,1,2,3,4
ImageId,1,2,3,4,5
Image,182 183 182 182 180 180 176 169 156 137 124 10...,76 87 81 72 65 59 64 76 69 42 31 38 49 58 58 4...,177 176 174 170 169 169 168 166 166 166 161 14...,176 174 174 175 174 174 176 176 175 171 165 15...,50 47 44 101 144 149 120 58 48 42 35 35 37 39 ...


In [6]:
facial_pts_data = train.drop(['Image'], axis=1)

X_train = []
y_train = []
X_test = []

for i in range(len(train)):
    X_train.append(train['Image'][i].split(' '))
    y_train.append(facial_pts_data.iloc[i])

for i in range(len(test)):
    X_test.append(test['Image'][i].split(' '))
    

X_train = np.array(X_train, dtype='float')
y_train = np.array(y_train, dtype='float')
X_test = np.array(X_test, dtype='float')

# 訓練データと検証データに分割し、データローダーを作成

In [7]:
def train_test_split(X_train, validation_split):
  dataset_size = len(X_train)
  indices = list(range(dataset_size))
  val_num = int(np.floor(validation_split*dataset_size))
  np.random.shuffle(indices)
  train_indices, val_indices = indices[val_num:], indices[:val_num]

  train_sampler = SubsetRandomSampler(train_indices)
  valid_sampler = SubsetRandomSampler(val_indices)

  loader_object = data_utils.TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
  train_loader = data_utils.DataLoader(loader_object, batch_size=32, sampler=train_sampler)
  valid_loader = data_utils.DataLoader(loader_object, batch_size=32, sampler=valid_sampler)
  return train_loader, valid_loader

train_loader, valid_loader = train_test_split(X_train, 0.2)

# CNNモデルの構築

In [8]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5) # (1,1,96,96) to (1,4,92,92)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5) # (1,4,46,46) to (1,8,42,42)
        self.fc1 = nn.Linear(32*21*21, 250)
        self.fc2 = nn.Linear(250, 30)
        self.dp1 = nn.Dropout(p=0.4)
    
        
    
    def forward(self, x, verbose=False):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.dp1(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.dp1(x)
        x = x.view(-1, 32*21*21)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dp1(x)
        x = self.fc2(x)
        return x

# モデルの訓練

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def training(epochs, model, criterion, device, train_loader, valid_loader, optimizer):
  for epoch in range(epochs):
    model.train()
    train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
      data, target = data.to(device), target.to(device)
      data = data.view(-1, 96*96)
      data = data.view(-1, 1, 96, 96)
      optimizer.zero_grad()
      output = model(data)
      loss = criterion(output, target)
      train_loss += loss.item()
      loss.backward()
      optimizer.step()

In [10]:
n_hidden = 128
output_size = 30
epochs = 50

model = CNN()
model.to(device)
criterion = torch.nn.MSELoss() 
optimizer = optim.Adam(model.parameters())

training(epochs, model, criterion, device, train_loader, valid_loader, optimizer)

# 結果の予測

In [11]:
X_test_torch = torch.from_numpy(X_test).float().view(len(X_test),1,96,96).to(device)
model.eval()
test_predictions = model(X_test_torch)
test_predictions = test_predictions.cpu().data.numpy()

# 提出

In [12]:
keypts_labels = train.columns.tolist()

id_lookup = pd.read_csv('IdLookupTable.csv')
id_lookup_features = list(id_lookup['FeatureName'])
id_lookup_image = list(id_lookup['ImageId'])

for i in range(len(id_lookup_features)):
  id_lookup_features[i] = keypts_labels.index(id_lookup_features[i])

location = []
for i in range(len(id_lookup_features)):
  location.append(test_predictions[id_lookup_image[i]-1][id_lookup_features[i]])

id_lookup['Location'] = location

In [13]:
submission = id_lookup[['RowId', 'Location']]
submission.to_csv('submission_pytorch.csv', index=False)

In [None]:
submission.head()