In [None]:
%%bash

pushd ../data

unzip -O CP936 xuelang_round1_train_part1_20180628.zip
unzip -O CP936 xuelang_round1_train_part2_20180705.zip
unzip -O CP936 xuelang_round1_train_part3_20180709.zip

unzip -O CP936 -P D4nd8kHIfK xuelang_round1_test_b_20180802.zip.zip 
unzip -O CP936 -P Y5TzEQLbHD xuelang_round1_test_b.zip 

rm *.zip

mkdir round1_train

# in unzip ?
rsync -a xuelang_round1_train_part1_20180628/ round1_train/
rsync -a xuelang_round1_train_part2_20180705/ round1_train/
rsync -a xuelang_round1_train_part3_20180709/ round1_train/

popd

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os

import torch

from dataset import *
from model import *
from solver import *

In [None]:
import torchvision.transforms as transforms

transform_train = transforms.Compose([ 
    transforms.Resize(512),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(0.2,0.2,0.2, 0.1),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225)),
])

root_train = os.path.join("..", "data", "round1_train")
loader_train = get_loader(root_train, is_train=True, batch_size=32, transform=transform_train)

In [None]:
model = BaselineModel()
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.head.parameters(), lr=0.001)

solver = Solver(model.head, loss_fn, optim)

In [None]:
prepare_data(loader_train, model.body, root_train, n_epochs=12)
loader_train = get_loader(root_train, is_train=True, batch_size=128, is_cached=True)

In [None]:
solver.fit(loader_train, n_epochs=400)

In [None]:
torch.save(model.state_dict(), "w.h5")

In [None]:
model.load_state_dict(torch.load("w.h5"))

In [None]:
root_test = os.path.join("..", "data", "xuelang_round1_test_b")

import torchvision.transforms as transforms
transform_test = transforms.Compose([ 
    transforms.Resize(512),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225)),
])

loader_test = get_loader(root_test, False, batch_size=32, transform=transform_test)

In [None]:
import torch.nn.functional as F
import pandas as pd


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

def predict():
    result = pd.DataFrame(columns=["filename", "probability"])
    for x, path in loader_test:
        x = x.to(device)
    
        y = model(x)
        y = F.softmax(y, dim=1)
        # here should be 1 - y, but I realized it after final submit
        y = y.cpu().numpy()[:, 20]
        data = pd.DataFrame({"filename":path, "probability":y})
        result = result.append(data, ignore_index=True)
    return result

with torch.no_grad():
    r = predict()

In [None]:
r.head()

In [None]:
r["probability"] = r["probability"].round(4).clip(0.0001, 0.9999)

In [None]:
r.head()

In [None]:
import datetime

filename = "../submit/submit_"+datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv"
r.to_csv(filename, index=False, float_format='%.4f')