In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
def intersect_over_union(boxes):
    assert(len(boxes) == 8)
    boxA = boxes[:4].values
    boxB = boxes[4:].values
    
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    
    if (boxAArea == 0 or boxBArea == 0):
        return 0
        
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

def metrica(predicted, ground):
    assert ground.shape[0] == predicted.shape[0], 'DataFrames should have equal length'
    iou = list()
    for i in range(ground.shape[0]):
        temp_row = pd.concat([predicted.iloc[i], ground.iloc[i]])
        temp_val = intersect_over_union(temp_row)
        iou.append(temp_val)
    return pd.Series(iou)

In [3]:
votes = pd.read_csv("train_data.csv")
answers = pd.read_csv("train_answers.csv")
test = pd.read_csv("test_data.csv")

In [4]:
quorum = votes.groupby("itemId")[['Xmin','Ymin', 'Xmax', 'Ymax']].mean().reset_index()
data = quorum.merge(answers, on=['itemId'])
data['iou'] = data[['Xmin','Ymin', 'Xmax', 'Ymax', 'Xmin_true', 'Ymin_true', 'Xmax_true','Ymax_true']].apply(intersect_over_union, axis=1)

In [5]:
test = test.groupby("itemId")[['Xmin','Ymin', 'Xmax', 'Ymax']].mean().round().astype(int).reset_index()
#out.to_csv('submit.csv', header=False, index=False)

In [6]:
# Separate linear models
test = test.groupby("itemId")[['Xmin','Ymin', 'Xmax', 'Ymax']].mean().round().astype(int).reset_index()
out = pd.DataFrame()
out['itemId'] = test['itemId']

lr_x_min = LinearRegression()
lr_x_min.fit(data[['Xmin','Ymin', 'Xmax', 'Ymax']], data['Xmin_true'])
out['Xmin'] = pd.Series(lr_x_min.predict(test[['Xmin','Ymin', 'Xmax', 'Ymax']]))

lr_y_min = LinearRegression()
lr_y_min.fit(data[['Xmin','Ymin', 'Xmax', 'Ymax']], data['Ymin_true'])
out['Ymin'] = pd.Series(lr_y_min.predict(test[['Xmin','Ymin', 'Xmax', 'Ymax']]))

lr_x_max = LinearRegression()
lr_x_max.fit(data[['Xmin','Ymin', 'Xmax', 'Ymax']], data['Xmax_true'])
out['Xmax'] = pd.Series(lr_x_max.predict(test[['Xmin','Ymin', 'Xmax', 'Ymax']]))

lr_y_max = LinearRegression()
lr_y_max.fit(data[['Xmin','Ymin', 'Xmax', 'Ymax']], data['Ymax_true'])
out['Ymax'] = pd.Series(lr_y_max.predict(test[['Xmin','Ymin', 'Xmax', 'Ymax']]))

out.to_csv('submit.csv', header=False, index=False)

In [7]:
# Separate decision trees
test = test.groupby("itemId")[['Xmin','Ymin', 'Xmax', 'Ymax']].mean().round().astype(int).reset_index()
out = pd.DataFrame()
out['itemId'] = test['itemId']

lr_x_min = DecisionTreeRegressor(max_depth=5)
lr_x_min.fit(data[['Xmin','Ymin', 'Xmax', 'Ymax']], data['Xmin_true'])
out['Xmin'] = pd.Series(lr_x_min.predict(test[['Xmin','Ymin', 'Xmax', 'Ymax']]))

lr_y_min = DecisionTreeRegressor(max_depth=5)
lr_y_min.fit(data[['Xmin','Ymin', 'Xmax', 'Ymax']], data['Ymin_true'])
out['Ymin'] = pd.Series(lr_y_min.predict(test[['Xmin','Ymin', 'Xmax', 'Ymax']]))

lr_x_max = DecisionTreeRegressor(max_depth=5)
lr_x_max.fit(data[['Xmin','Ymin', 'Xmax', 'Ymax']], data['Xmax_true'])
out['Xmax'] = pd.Series(lr_x_max.predict(test[['Xmin','Ymin', 'Xmax', 'Ymax']]))

lr_y_max = DecisionTreeRegressor(max_depth=5)
lr_y_max.fit(data[['Xmin','Ymin', 'Xmax', 'Ymax']], data['Ymax_true'])
out['Ymax'] = pd.Series(lr_y_max.predict(test[['Xmin','Ymin', 'Xmax', 'Ymax']]))

out.to_csv('submit.csv', header=False, index=False)