## Imports

In [1]:
# Python standard library
import csv
import json
import datetime
import sys

# Scipy
import numpy as np
import sklearn.linear_model
import sklearn.model_selection
import pandas as pd

import matplotlib.pyplot as plt

# Jupyter
import IPython.display

# Program-specific
sys.path.append('..')
import constants
import functions

## Test rotations/reflections

In [None]:
test = np.zeros((5,5))

test[:,2] = 1
test[0,3] = 1

plt.imshow(test, cmap = 'gray', origin = 'lower')
plt.show()



# Rotations
print('rotations')
for i in range(1,4):
    plt.imshow(np.rot90(test, i), cmap = 'gray', origin = 'lower')
    plt.show()

    
    
# Reflections
print('reflections')
plt.imshow(np.flip(test, 0), cmap = 'gray', origin = 'lower')
plt.show()

plt.imshow(np.flip(test, 1), cmap = 'gray', origin = 'lower')
plt.show()

# Rot/reflec
print('rotation/reflection')
plt.imshow(np.flip(np.rot90(test,1), axis = 0), cmap = 'gray', origin = 'lower')
plt.show()

plt.imshow(np.flip(np.rot90(test,3), axis = 0), cmap = 'gray', origin = 'lower')
plt.show()



## Load data set

In [None]:
df_train = functions.LoadTrainData(aug = False)

## Augment data

In [None]:
output_file_path = '../data/train/augmented/train_rotref.csv'
with open(output_file_path, 'w') as output_file_handle:
    output_file_writer = csv.writer(output_file_handle, delimiter = ',')
    
    
    
    output_file_writer.writerow(['id', 'is_iceberg', 'inc_angle'] + ['band_1_' + str(i) for i in range(75*75)] + ['band_2_' + str(i) for i in range(75*75)])

    
    for i in range(len(df_train)):
        
        row_data = df_train.iloc[i]

        id = row_data['id']
        is_iceberg = row_data['is_iceberg']
        inc_angle = row_data['inc_angle']
        
        band_1 = row_data[[column for column in df_train.columns if 'band_1' in column]]
        band_2 = row_data[[column for column in df_train.columns if 'band_2' in column]]
        
        
        
        # Rotations
        for j in range(1,4):
            new_id = id + '_rot90-' + str(j)
            new_band_1 = np.rot90(band_1.reshape((75,75)), j).flatten()
            new_band_2 = np.rot90(band_2.reshape((75,75)), j).flatten()
            output_file_writer.writerow([new_id] + [is_iceberg] + [inc_angle] + list(new_band_1) + list(new_band_2))
            
        # Reflections
        new_id = id + '_ref-0'
        new_band_1 = np.flip(band_1.reshape((75,75)), 0).flatten()
        new_band_2 = np.flip(band_2.reshape((75,75)), 0).flatten()
        output_file_writer.writerow([new_id] + [is_iceberg] + [inc_angle] + list(new_band_1) + list(new_band_2))

        
        new_id = id + '_ref-1'
        new_band_1 = np.flip(band_1.reshape((75,75)), 1).flatten()
        new_band_2 = np.flip(band_2.reshape((75,75)), 1).flatten()
        output_file_writer.writerow([new_id] + [is_iceberg] + [inc_angle] + list(new_band_1) + list(new_band_2))

        new_id = id + '_rot90-1_ref-0'
        new_band_1 = np.flip(np.rot90(band_1.reshape((75,75)), 1), 0).flatten()
        new_band_2 = np.flip(np.rot90(band_2.reshape((75,75)), 1), 0).flatten()
        output_file_writer.writerow([new_id] + [is_iceberg] + [inc_angle] + list(new_band_1) + list(new_band_2))
        
        new_id = id + '_rot90-3_ref-0'
        new_band_1 = np.flip(np.rot90(band_1.reshape((75,75)), 3), 0).flatten()
        new_band_2 = np.flip(np.rot90(band_2.reshape((75,75)), 3), 0).flatten()
        output_file_writer.writerow([new_id] + [is_iceberg] + [inc_angle] + list(new_band_1) + list(new_band_2))
        
        
        
            
            

## Test file

In [None]:
with open(output_file_path, 'r') as file_handle:
    file_reader = csv.reader(file_handle, delimiter = ',')
    for i in range(2):
        print(next(file_reader))

## Test augmentations with logistic regression

In [7]:
# Non-augmented
df = functions.LoadTrainData(aug = False, mix = True)





N_train = int(len(df)*0.8)
N_test = len(df) - N_train


df.loc[df['inc_angle'] == 'na', 'inc_angle'] = 0
df['inc_angle'] = df['inc_angle'].apply(pd.to_numeric)



inputs = constants.inputs
output = constants.output


model = sklearn.linear_model.LogisticRegression(C = 1)

model.fit(df[inputs][:N_train], df[output][:N_train])
print(model.score(df[inputs][N_train:], df[output][N_train:]))

  if self.run_code(code, result):


In [8]:
model = sklearn.linear_model.LogisticRegression(C = 1)

model.fit(df[inputs][:N_train], df[output][:N_train])
print(model.score(df[inputs][N_train:], df[output][N_train:]))

0.735202492212


In [9]:
# Non-augmented
df = functions.LoadTrainData(aug = True, mix = True)





N_train = int(len(df)*0.8)
N_test = len(df) - N_train


df.loc[df['inc_angle'] == 'na', 'inc_angle'] = 0
df['inc_angle'] = df['inc_angle'].apply(pd.to_numeric)



inputs = constants.inputs
output = constants.output


model = sklearn.linear_model.LogisticRegression(C = 1)

model.fit(df[inputs][:N_train], df[output][:N_train])
print(model.score(df[inputs][N_train:], df[output][N_train:]))

  if self.run_code(code, result):


0.724970783015
