In [1]:
import torch
import numpy as np
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import pandas as pd

In [594]:
import torch.nn.functional as F

In [159]:
###  Load and Clean Data  ###

#Load as panda
xy_train = pd.read_csv('./data/titanic/train.csv')
x_test = pd.read_csv('./data/titanic/test.csv')
#xy_train = pd.read_csv('./data/titanic/debug_1s.csv')

#Scale Sex
xy_train['Sex'] = xy_train['Sex'].map({'male':1, 'female':0})
x_test['Sex'] = x_test['Sex'].map({'male':1, 'female':0})


#Pclass to one hot
pclass_dummies_tr = pd.get_dummies(xy_train['Pclass'], prefix='Pclass')
xy_train = pd.concat([xy_train, pclass_dummies_tr], axis=1)

pclass_dummies_t = pd.get_dummies(x_test['Pclass'], prefix='Pclass')
x_test = pd.concat([x_test, pclass_dummies_t], axis=1)

#Scale Fare
#xy_train.Fare = xy_train.Fare/100
#x_test.Fare = x_test.Fare/100



xy_train.sample(3)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Pclass_1,Pclass_2,Pclass_3
241,242,1,3,"Murphy, Miss. Katherine ""Kate""",0,,1,0,367230,15.5,,Q,0,0,1
657,658,0,3,"Bourke, Mrs. John (Catherine)",0,32.0,1,1,364849,15.5,,Q,0,0,1
451,452,0,3,"Hagland, Mr. Ingvald Olai Olsen",1,,1,0,65303,19.9667,,S,0,0,1


In [160]:
x_test.sample(3)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Pclass_1,Pclass_2,Pclass_3
246,1138,2,"Karnes, Mrs. J Frank (Claire Bennett)",0,22.0,0,0,F.C.C. 13534,21.0,,S,0,1,0
285,1177,3,"Dennis, Mr. William",1,36.0,0,0,A/5 21175,7.25,,S,0,0,1
356,1248,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",0,59.0,2,0,11769,51.4792,C101,S,1,0,0


In [240]:
#age cleanup WIP
grouped_train = xy_train.groupby(['Sex','Pclass','Title'])
grouped_median_train = grouped_train.median()
grouped_median_train = grouped_median_train.reset_index()[['Sex', 'Pclass', 'Title', 'Age']]
grouped_median_train.head()

Unnamed: 0,Sex,Pclass,Title,Age
0,0,1,Miss,30.0
1,0,1,Mrs,40.0
2,0,1,Officer,49.0
3,0,1,Royalty,40.5
4,0,2,Miss,24.0


In [244]:
#age cleanup part 2
def fill_age(row):
    condition = (
        (grouped_median_train['Sex'] == row['Sex']) & 
        (grouped_median_train['Title'] == row['Title']) & 
        (grouped_median_train['Pclass'] == row['Pclass'])
    ) 
    return grouped_median_train[condition]['Age'].values[0]

xy_train['Age'] = xy_train.apply(lambda row: fill_age(row) if np.isnan(row['Age']) else row['Age'], axis=1)
x_test['Age'] = x_test.apply(lambda row: fill_age(row) if np.isnan(row['Age']) else row['Age'], axis=1)

In [471]:
#optional age cleanup part 3 (scale)
xy_train.Age = xy_train.Age/80
x_test.Age = x_test.Age/80

In [199]:
Title_Dictionary = {
    "Capt": "Officer",
    "Col": "Officer",
    "Major": "Officer",
    "Jonkheer": "Royalty",
    "Don": "Royalty",
    "Sir" : "Royalty",
    "Dr": "Officer",
    "Rev": "Officer",
    "the Countess":"Royalty",
    "Mme": "Mrs",
    "Mlle": "Miss",
    "Ms": "Mrs",
    "Mr" : "Mr",
    "Mrs" : "Mrs",
    "Miss" : "Miss",
    "Master" : "Master",
    "Lady" : "Royalty"
}


# we extract the title from each name
xy_train['Title'] = xy_train['Name'].map(lambda name:name.split(',')[1].split('.')[0].strip())
x_test['Title'] = x_test['Name'].map(lambda name:name.split(',')[1].split('.')[0].strip())

# a map of more aggregated title
# we map each title
xy_train['Title'] = xy_train.Title.map(Title_Dictionary)
x_test['Title'] = x_test.Title.map(Title_Dictionary)

#convert to one hot
titles_dummies_tr = pd.get_dummies(xy_train['Title'], prefix='Title')
xy_train = pd.concat([xy_train, titles_dummies_tr], axis=1)

titles_dummies_t = pd.get_dummies(x_test['Title'], prefix='Title')
x_test = pd.concat([x_test, titles_dummies_t], axis=1)


xy_train.sample(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,Pclass_1,Pclass_2,Pclass_3,Title,Title_Master,Title_Miss,Title_Mr,Title_Mrs,Title_Officer,Title_Royalty
57,58,0,3,"Novel, Mr. Mansouer",1,28.5,0,0,2697,7.2292,...,0,0,1,Mr,0,0,1,0,0,0
92,93,0,1,"Chaffee, Mr. Herbert Fuller",1,46.0,1,0,W.E.P. 5734,61.175,...,1,0,0,Mr,0,0,1,0,0,0
252,253,0,1,"Stead, Mr. William Thomas",1,62.0,0,0,113514,26.55,...,1,0,0,Mr,0,0,1,0,0,0


In [731]:
#x_test.sample(3)
x_test.ix[206:208, 15:30]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Unnamed: 0,Title,Title_Master,Title_Miss,Title_Mr,Title_Mrs,Title_Officer,FamilyS,FamilySize,Singleton,SmallFamily,LargeFamily,Cabin_A,Cabin_B,Cabin_C,Cabin_D
206,Miss,0,1,0,0,0,1,1,1,0,0,0,0,0,0
207,Mr,0,0,1,0,0,1,1,1,0,0,0,0,0,0
208,Miss,0,1,0,0,0,1,1,1,0,0,1,0,0,0


In [213]:
xy_train['FamilyS'] = xy_train['SibSp'] + xy_train['Parch'] + 1
x_test['FamilyS'] = x_test['SibSp'] + x_test['Parch'] + 1



# introducing a new feature : the size of families (including the passenger)
xy_train['FamilySize'] = xy_train['Parch'] + xy_train['SibSp'] + 1
   
# introducing other features based on the family size
xy_train['Singleton'] = xy_train['FamilySize'].map(lambda s: 1 if s == 1 else 0)
xy_train['SmallFamily'] = xy_train['FamilySize'].map(lambda s: 1 if 2 <= s <= 4 else 0)
xy_train['LargeFamily'] = xy_train['FamilySize'].map(lambda s: 1 if 5 <= s else 0)


# introducing a new feature : the size of families (including the passenger)
x_test['FamilySize'] = x_test['Parch'] + x_test['SibSp'] + 1
   
# introducing other features based on the family size
x_test['Singleton'] = x_test['FamilySize'].map(lambda s: 1 if s == 1 else 0)
x_test['SmallFamily'] = x_test['FamilySize'].map(lambda s: 1 if 2 <= s <= 4 else 0)
x_test['LargeFamily'] = x_test['FamilySize'].map(lambda s: 1 if 5 <= s else 0)



In [854]:
xy_train.sample(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Cabin_U,Embarked_C,Embarked_Q,Embarked_S,Pclass_fit,Fare_fit
255,256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",0,0.3625,0,2,2650,15.2458,...,0,0,0,0,1,1,0,0,0.827377,-0.341452
637,638,0,2,"Collyer, Mr. Harvey",1,0.3875,1,1,C.A. 31921,26.25,...,0,0,0,0,1,0,0,1,-0.369365,-0.119886
419,420,0,3,"Van Impe, Miss. Catharina",0,0.125,0,2,345773,24.15,...,0,0,0,0,1,0,0,1,0.827377,-0.162169


In [227]:
#process fare
xy_train.Fare.fillna(xy_train.Fare.mean(), inplace=True)
x_test.Fare.fillna(xy_train.Fare.mean(), inplace=True) # we use tran set data here



In [851]:
#extra fare processing
#xy_train.Fare = xy_train.Fare/513
#x_test.Fare = x_test.Fare/513

#put back
xy_train.Fare = xy_train.Fare*513
x_test.Fare = x_test.Fare*513

In [853]:
# test : scale fare
xy_train['Fare_fit'] = StandardScaler().fit_transform(xy_train['Fare'].values.reshape(-1, 1))
x_test['Fare_fit'] = StandardScaler().fit_transform(x_test['Fare'].values.reshape(-1, 1))

In [238]:
# get the number of missing data points per column
missing_values_count = xy_train.isnull().sum()

# look at the # of missing points in the first ten columns
missing_values_count [5] #5 is age

# how many total missing values do we have?
total_cells = (xy_train.shape[0])
total_missing = missing_values_count[5]

# percent of data that is missing
(total_missing/total_cells) * 100

19.865319865319865

In [674]:
#play with embarked
xy_train['Embarked'].fillna('S', inplace = True)
xy_train.isnull().sum().sort_values(ascending = False)

# dummy encoding ...
Embarked_tr = pd.get_dummies(xy_train['Embarked'], prefix='Embarked')    
xy_train = pd.concat([xy_train, Embarked_tr], axis=1)

Embarked_t = pd.get_dummies(x_test['Embarked'], prefix='Embarked')    
x_test = pd.concat([x_test, Embarked_t], axis=1)

In [822]:
from sklearn.preprocessing import StandardScaler

In [828]:
#test with pclass
xy_train['Pclass_fit'] = StandardScaler().fit_transform(xy_train['Pclass'].values.reshape(-1, 1))
x_test['Pclass_fit'] = StandardScaler().fit_transform(x_test['Pclass'].values.reshape(-1, 1))



In [826]:
xy_train.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Pclass_1,Pclass_2,...,Cabin_D,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Cabin_U,Embarked_C,Embarked_Q,Embarked_S,Pclass_fit
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,...,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,0.647587,0.364228,0.523008,0.381594,0.062776,0.242424,0.20651,...,0.037037,0.035915,0.01459,0.004489,0.001122,0.771044,0.188552,0.08642,0.725028,-2.031048e-16
std,257.353842,0.486592,0.836071,0.47799,0.16869,1.102743,0.806057,0.096868,0.42879,0.405028,...,0.188959,0.186182,0.119973,0.06689,0.033501,0.420397,0.391372,0.281141,0.446751,1.000562
min,1.0,0.0,1.0,0.0,0.00525,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.566107
25%,223.5,0.0,2.0,0.0,0.2625,0.0,0.0,0.01542,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,-0.3693648
50%,446.0,0.0,3.0,1.0,0.325,0.0,0.0,0.028176,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.8273772
75%,668.5,1.0,3.0,1.0,0.459375,1.0,0.0,0.060429,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.8273772
max,891.0,1.0,3.0,1.0,1.0,8.0,6.0,0.998692,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8273772


In [791]:
xy_train.sample(1)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,Cabin_C,Cabin_D,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Cabin_U,Embarked_C,Embarked_Q,Embarked_S
764,765,0,3,"Eklund, Mr. Hans Linus",1,0.2,0,0,347074,0.015156,...,0,0,0,0,0,0,1,0,0,1


In [898]:
class TitanicDataset(Dataset):
    """ Titanic dataset."""

    # Initialize your data, download, etc.
    def __init__(self):

        #Train data
        self.len = xy_train.shape[0]
        #self.pt_x_data = torch.from_numpy(np.array(xy_train.loc[:, ['Cabin_C','Cabin_D','Cabin_E','Cabin_F','Cabin_G','Age', 'Sex', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Parch', 'Title_Master', 'Title_Miss', 'Title_Mr', 'Title_Mrs','Pclass_3', 'Title_Officer', 'Title_Royalty', 'FamilySize', 'Singleton', 'SmallFamily', 'LargeFamily', 'Fare']].values, dtype='float')).float()
        self.pt_x_data = torch.from_numpy(np.array(xy_train.loc[:, ['Parch','SibSp', 'Pclass_1', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S',  'Cabin_C','Cabin_U','Title_Mrs','Title_Miss','Title_Mr', 'Title_Master',  'Sex',   'Singleton', 'SmallFamily', 'LargeFamily']].values, dtype='float')).float()
        #self.pt_x_data = torch.from_numpy(np.array(xy_train.loc[:, ['1','2','3']].values, dtype='float')).float()
        self.y_data = np.array(xy_train.loc[:, ['Survived']].values, dtype='float')
        self.pt_y_data = torch.from_numpy(self.y_data).float()
        
        #Test data
        #self.pt_x_data_test = torch.from_numpy(np.array(x_test.loc[:, ['Age', 'Sex', 'Pclass_1', 'Pclass_2', 'Pclass_3','Parch', 'Title_Master','Title_Miss', 'Title_Mr', 'Title_Mrs', 'Title_Officer', 'Title_Royalty', 'FamilySize','Pclass_3', 'Singleton', 'SmallFamily', 'LargeFamily', 'Fare']].values, dtype='float')).float()
        self.pt_x_data_test = torch.from_numpy(np.array(x_test.loc[:, ['Parch','SibSp', 'Pclass_1','Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S',  'Cabin_C','Cabin_U','Title_Mrs','Title_Miss', 'Title_Mr', 'Title_Master',  'Sex',  'Singleton', 'SmallFamily', 'LargeFamily']].values, dtype='float')).float()
        #self.y_data_test = np.array(xy_test.loc[:, ['Survived']].values, dtype='float')

    def __getitem__(self, index):
        return self.pt_x_data[index], self.pt_y_data[index]

    def __len__(self):
        return self.len


dataset = TitanicDataset()

In [899]:
train_loader = DataLoader(dataset=dataset,
                          batch_size=50,
                          shuffle=True,
                          num_workers=0)

In [900]:
#type(train_loader)
print (dataset.pt_x_data.shape)
print (dataset.y_data.shape)
print (dataset.pt_y_data.shape)

#print (dataset.pt_x_data)
#print (dataset.y_data)


torch.Size([891, 17])
(891, 1)
torch.Size([891, 1])


In [901]:
class Model(torch.nn.Module):

    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear module
        """
        super(Model, self).__init__()
        self.L1 = torch.nn.Linear(17, 270)
        self.L2 = torch.nn.Linear(270, 1)
        #self.L3 = torch.nn.Linear(50, 1)

        self.sigmoid = torch.nn.Sigmoid()
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Variables.
        """
        #out1 = self.sigmoid(self.l1(x))
        #out2 = self.sigmoid(self.l2(out1))
        #y_pred = self.sigmoid(self.l3(out2))
        
        #out1 = self.relu(self.L1(x))
        
        out1 = self.L1(x)
        out1 = F.dropout(out1, p=0.1)
        out1 = self.relu(out1)
        
        #out2 = self.L2(out1)
        #out2 = F.dropout(out2, p=0.1)
        #out2 = self.relu(out2)
        
        y_pred = self.sigmoid(self.L2(out1))
        
        
                
        return y_pred

# our model
model = Model()

In [902]:
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.BCELoss(size_average=True)
#optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)



In [903]:
# Training loop
model.train()

for epoch in range(100):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(inputs)

        # Compute and print loss
        loss = criterion(y_pred, labels)
        print(epoch, i, loss.data[0])

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()



0 0 tensor(0.7053)
0 1 tensor(0.6683)
0 2 tensor(0.5071)
0 3 tensor(0.5333)
0 4 tensor(0.5489)
0 5 tensor(0.4437)
0 6 tensor(0.4669)
0 7 tensor(0.5116)
0 8 tensor(0.4516)
0 9 tensor(0.4537)
0 10 tensor(0.3731)
0 11 tensor(0.4640)
0 12 tensor(0.4403)
0 13 tensor(0.6396)
0 14 tensor(0.3435)
0 15 tensor(0.4385)
0 16 tensor(0.4426)
0 17 tensor(0.4456)
1 0 tensor(0.3718)
1 1 tensor(0.3296)
1 2 tensor(0.3999)
1 3 tensor(0.6366)
1 4 tensor(0.4278)
1 5 tensor(0.4041)
1 6 tensor(0.4065)
1 7 tensor(0.3621)
1 8 tensor(0.5556)
1 9 tensor(0.4626)
1 10 tensor(0.2890)
1 11 tensor(0.4383)
1 12 tensor(0.3991)
1 13 tensor(0.5057)
1 14 tensor(0.3995)
1 15 tensor(0.2952)
1 16 tensor(0.4265)
1 17 tensor(0.4411)
2 0 tensor(0.3989)
2 1 tensor(0.4582)
2 2 tensor(0.3973)
2 3 tensor(0.3052)
2 4 tensor(0.5494)
2 5 tensor(0.5188)
2 6 tensor(0.4725)
2 7 tensor(0.5321)
2 8 tensor(0.3242)
2 9 tensor(0.4165)
2 10 tensor(0.3201)
2 11 tensor(0.3828)
2 12 tensor(0.3534)
2 13 tensor(0.4168)
2 14 tensor(0.2646)
2 15 tenso

23 10 tensor(0.5615)
23 11 tensor(0.4866)
23 12 tensor(0.3865)
23 13 tensor(0.3391)
23 14 tensor(0.2607)
23 15 tensor(0.3357)
23 16 tensor(0.3835)
23 17 tensor(0.3721)
24 0 tensor(0.3175)
24 1 tensor(0.4293)
24 2 tensor(0.3599)
24 3 tensor(0.3494)
24 4 tensor(0.2211)
24 5 tensor(0.3511)
24 6 tensor(0.3911)
24 7 tensor(0.4183)
24 8 tensor(0.3739)
24 9 tensor(0.3478)
24 10 tensor(0.3860)
24 11 tensor(0.2464)
24 12 tensor(0.3315)
24 13 tensor(0.3589)
24 14 tensor(0.3784)
24 15 tensor(0.3351)
24 16 tensor(0.4202)
24 17 tensor(0.3419)
25 0 tensor(0.4393)
25 1 tensor(0.1529)
25 2 tensor(0.4720)
25 3 tensor(0.1891)
25 4 tensor(0.3468)
25 5 tensor(0.2555)
25 6 tensor(0.3819)
25 7 tensor(0.4157)
25 8 tensor(0.4187)
25 9 tensor(0.3949)
25 10 tensor(0.4207)
25 11 tensor(0.3298)
25 12 tensor(0.2999)
25 13 tensor(0.3069)
25 14 tensor(0.2766)
25 15 tensor(0.3510)
25 16 tensor(0.4448)
25 17 tensor(0.4045)
26 0 tensor(0.3625)
26 1 tensor(0.3962)
26 2 tensor(0.2666)
26 3 tensor(0.3851)
26 4 tensor(0.30

46 9 tensor(0.3489)
46 10 tensor(0.4167)
46 11 tensor(0.3795)
46 12 tensor(0.3809)
46 13 tensor(0.4154)
46 14 tensor(0.4183)
46 15 tensor(0.1992)
46 16 tensor(0.2735)
46 17 tensor(0.4374)
47 0 tensor(0.2954)
47 1 tensor(0.1901)
47 2 tensor(0.3018)
47 3 tensor(0.4052)
47 4 tensor(0.2971)
47 5 tensor(0.2355)
47 6 tensor(0.1927)
47 7 tensor(0.3414)
47 8 tensor(0.3211)
47 9 tensor(0.4081)
47 10 tensor(0.2908)
47 11 tensor(0.4794)
47 12 tensor(0.3694)
47 13 tensor(0.3483)
47 14 tensor(0.3372)
47 15 tensor(0.4693)
47 16 tensor(0.4343)
47 17 tensor(0.3227)
48 0 tensor(0.2158)
48 1 tensor(0.2424)
48 2 tensor(0.3885)
48 3 tensor(0.2708)
48 4 tensor(0.2388)
48 5 tensor(0.2752)
48 6 tensor(0.4364)
48 7 tensor(0.2689)
48 8 tensor(0.1628)
48 9 tensor(0.3042)
48 10 tensor(0.4489)
48 11 tensor(0.4194)
48 12 tensor(0.4477)
48 13 tensor(0.4916)
48 14 tensor(0.3891)
48 15 tensor(0.3966)
48 16 tensor(0.4358)
48 17 tensor(0.4095)
49 0 tensor(0.3769)
49 1 tensor(0.2999)
49 2 tensor(0.2862)
49 3 tensor(0.37

71 11 tensor(0.2477)
71 12 tensor(0.4882)
71 13 tensor(0.3301)
71 14 tensor(0.3882)
71 15 tensor(0.2105)
71 16 tensor(0.2450)
71 17 tensor(0.3120)
72 0 tensor(0.2713)
72 1 tensor(0.2708)
72 2 tensor(0.2107)
72 3 tensor(0.2861)
72 4 tensor(0.5584)
72 5 tensor(0.4627)
72 6 tensor(0.5591)
72 7 tensor(0.1805)
72 8 tensor(0.2881)
72 9 tensor(0.3239)
72 10 tensor(0.3159)
72 11 tensor(0.3832)
72 12 tensor(0.3361)
72 13 tensor(0.3523)
72 14 tensor(0.3217)
72 15 tensor(0.2384)
72 16 tensor(0.4248)
72 17 tensor(0.2707)
73 0 tensor(0.4794)
73 1 tensor(0.2216)
73 2 tensor(0.3083)
73 3 tensor(0.3271)
73 4 tensor(0.2754)
73 5 tensor(0.3977)
73 6 tensor(0.3102)
73 7 tensor(0.3515)
73 8 tensor(0.3118)
73 9 tensor(0.2560)
73 10 tensor(0.3875)
73 11 tensor(0.3070)
73 12 tensor(0.3307)
73 13 tensor(0.2688)
73 14 tensor(0.3664)
73 15 tensor(0.2388)
73 16 tensor(0.3399)
73 17 tensor(0.4096)
74 0 tensor(0.3131)
74 1 tensor(0.4153)
74 2 tensor(0.2644)
74 3 tensor(0.2349)
74 4 tensor(0.4237)
74 5 tensor(0.250

95 14 tensor(0.2896)
95 15 tensor(0.2700)
95 16 tensor(0.3334)
95 17 tensor(0.2818)
96 0 tensor(0.3009)
96 1 tensor(0.2800)
96 2 tensor(0.3648)
96 3 tensor(0.3512)
96 4 tensor(0.3419)
96 5 tensor(0.3299)
96 6 tensor(0.3242)
96 7 tensor(0.3671)
96 8 tensor(0.3414)
96 9 tensor(0.4037)
96 10 tensor(0.3427)
96 11 tensor(0.3856)
96 12 tensor(0.3349)
96 13 tensor(0.2745)
96 14 tensor(0.2872)
96 15 tensor(0.2860)
96 16 tensor(0.1640)
96 17 tensor(0.3464)
97 0 tensor(0.3598)
97 1 tensor(0.2437)
97 2 tensor(0.2643)
97 3 tensor(0.2413)
97 4 tensor(0.3115)
97 5 tensor(0.3010)
97 6 tensor(0.4610)
97 7 tensor(0.4070)
97 8 tensor(0.3306)
97 9 tensor(0.3411)
97 10 tensor(0.3987)
97 11 tensor(0.3172)
97 12 tensor(0.2965)
97 13 tensor(0.3408)
97 14 tensor(0.2742)
97 15 tensor(0.4137)
97 16 tensor(0.3172)
97 17 tensor(0.3281)
98 0 tensor(0.2759)
98 1 tensor(0.4705)
98 2 tensor(0.3197)
98 3 tensor(0.3170)
98 4 tensor(0.4228)
98 5 tensor(0.3456)
98 6 tensor(0.4004)
98 7 tensor(0.2054)
98 8 tensor(0.1714)


In [None]:
#this will be last mini batch
print (y_pred.shape)

In [904]:
# Train and Test Set Predictions
print ("train set accuracy = " + str (accuracy(dataset.pt_x_data, dataset.y_data)))
#print ("test set accuracy = " + str (accuracy(dataset.pt_x_data_test, dataset.y_data_test)))

train set accuracy = 0.8653198653198653


In [None]:
print (dataset.y_data_test)




In [905]:
#Run test set through
model.eval()
test_pred = model(dataset.pt_x_data_test)
print (test_pred.shape)
print (type(test_pred))

#Add prediction col to test panda
#to_np = np.round(test_pred.detach().numpy())


#to_np = (test_pred.detach().numpy())
to_np = np.where((test_pred.detach().numpy())>0.5,1.0,0)


to_np = to_np.astype(int)
x_test['Survived'] = to_np


#Export to csv for submission
x_test.to_csv('mix44a50_PClass13_parch_sib_miss_MRS_MR_MASter_sex_cabCU_port-270mod-dropout.csv', columns = ['PassengerId','Survived'], index = False)


torch.Size([418, 1])
<class 'torch.Tensor'>


In [531]:
print (test_pred[73])

tensor([0.5141], grad_fn=<SelectBackward>)


In [71]:
def accuracy(X, Y):
    """
    This function is used to measure accuracy of the model.
    
    Arguments:
    X -- data set of examples you would like to label
    Y -- labels
    
    Returns:
    p -- predictions for the given dataset X
    """
    
    prediction = model(X)
    #print (prediction)
    np_prediction = np.round(prediction.detach().numpy())
    predictions_correct = np.equal(np_prediction, Y)
    #print (predictions_correct)
    p = np.mean(predictions_correct)
        
    return p

In [None]:
......................Extra Stuff...........
convertfunc = lambda x: float(x.strip("%"))/100
test = np.genfromtxt('./data/titanic/train.csv', delimiter=',', skip_header=1, usecols = (0,1,2,4))

#aai = a.iloc[:, [0,3]].values
train_x = a.loc[:, ['PassengerId', 'Pclass', 'Sex']].values
train_y = a.loc[:, ['Survived']].values

In [None]:
print ("pt_x_data: " + str (dataset.pt_x_data.shape))
print ("pt_y_data: " + str (dataset.pt_y_data.shape))
print ("y_data: " + str (dataset.y_data.shape))

In [None]:
print (type (train_x))
print (type (train_y))

print (train_x)
print (train_y)

In [None]:
#Read in debug file
debug_vals = np.genfromtxt('./data/titanic/debug_justPID.csv', delimiter=',', dtype=np.float32, skip_header=1)

#convert to correct format
#debug_x_data = torch.from_numpy(np.array(xy_train.loc[:, ['PassengerId', 'Pclass', 'Sex']].values, dtype='float')).float()
#debug_y_data = np.array(xy_train.loc[:, ['Survived']].values, dtype='float')

debug_x_data = torch.from_numpy(debug_vals[:, 0:-1])
debug_y_data = np.array(debug_vals[:, [-1]])
print (debug_x_data)
print (debug_y_data)

print ("debug set accuracy = " + str (accuracy(debug_x_data,debug_y_data)))

In [None]:
a = np.array([10,-1])
b = np.array([10,1])



In [None]:
np.dot(a.T,b)