In [1]:
import torch
import numpy as np
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import pandas as pd
import torch.nn.functional as F

from sklearn.preprocessing import StandardScaler

In [2]:
###  Load and Clean Data  ###

#Load as panda
xy_train = pd.read_csv('./data/titanic/train.csv')
x_test = pd.read_csv('./data/titanic/test.csv')
#xy_train = pd.read_csv('./data/titanic/debug_1s.csv')

#Scale Sex
xy_train['Sex'] = xy_train['Sex'].map({'male':1, 'female':0})
x_test['Sex'] = x_test['Sex'].map({'male':1, 'female':0})


#Pclass to one hot
pclass_dummies_tr = pd.get_dummies(xy_train['Pclass'], prefix='Pclass')
xy_train = pd.concat([xy_train, pclass_dummies_tr], axis=1)

pclass_dummies_t = pd.get_dummies(x_test['Pclass'], prefix='Pclass')
x_test = pd.concat([x_test, pclass_dummies_t], axis=1)

#Scale Fare
#xy_train.Fare = xy_train.Fare/100
#x_test.Fare = x_test.Fare/100



xy_train.sample(3)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Pclass_1,Pclass_2,Pclass_3
310,311,1,1,"Hays, Miss. Margaret Bechstein",0,24.0,0,0,11767,83.1583,C54,C,1,0,0
453,454,1,1,"Goldenberg, Mr. Samuel L",1,49.0,1,0,17453,89.1042,C92,C,1,0,0
385,386,0,2,"Davies, Mr. Charles Henry",1,18.0,0,0,S.O.C. 14879,73.5,,S,0,1,0


In [3]:
#Prep Validation Set [should have done this a long time ago!!!!!]
temp_validation_set = pd.read_csv('./data/titanic/full_titanic3_validation.csv')
validation_set = pd.read_csv('./data/titanic/test.csv')


df = pd.DataFrame(columns=['survived'])

#for each ticket in validation_set
for iv, rv in validation_set.iterrows():
    vt = (rv[8])
    #print ('vt is: ' +str (vt))
    #print (temp_validation_set.iloc[1])
    vs = (temp_validation_set['survived'].loc[temp_validation_set['ticket'] == vt])
    #print (vs.values[0])
    df = df.append([{'survived':(vs.values[0]).astype(int)}], ignore_index=True)
    #print (type(vs.values[0]))
    
    
validation_set = pd.concat([validation_set, df], axis=1)

validation_set.describe()   
validation_set.sample(10)
validation_set.head(n=10)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Survive,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,survived
0,892,3,"Kelly, Mr. James",male,,34.5,0,0,330911,7.8292,,Q,0
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,1.0,47.0,1,0,363272,7.0,,S,1
2,894,2,"Myles, Mr. Thomas Francis",male,,62.0,0,0,240276,9.6875,,Q,0
3,895,3,"Wirz, Mr. Albert",male,,27.0,0,0,315154,8.6625,,S,0
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,1.0,22.0,1,1,3101298,12.2875,,S,1
5,897,3,"Svensson, Mr. Johan Cervin",male,,14.0,0,0,7538,9.225,,S,1
6,898,3,"Connolly, Miss. Kate",female,0.0,30.0,0,0,330972,7.6292,,Q,0
7,899,2,"Caldwell, Mr. Albert Francis",male,,26.0,1,1,248738,29.0,,S,1
8,900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,,18.0,0,0,2657,7.2292,,C,1
9,901,3,"Davies, Mr. John Samuel",male,,21.0,2,0,A/4 48871,24.15,,S,0


In [None]:
print (validation_set.dtypes)
print (temp_validation_set.dtypes)

In [5]:
#age cleanup WIP
grouped_train = xy_train.groupby(['Sex','Pclass','Title'])
grouped_median_train = grouped_train.median()
grouped_median_train = grouped_median_train.reset_index()[['Sex', 'Pclass', 'Title', 'Age']]
grouped_median_train.head()

Unnamed: 0,Sex,Pclass,Title,Age
0,0,1,Miss,30.0
1,0,1,Mrs,40.0
2,0,1,Officer,49.0
3,0,1,Royalty,40.5
4,0,2,Miss,24.0


In [6]:
#age cleanup part 2
def fill_age(row):
    condition = (
        (grouped_median_train['Sex'] == row['Sex']) & 
        (grouped_median_train['Title'] == row['Title']) & 
        (grouped_median_train['Pclass'] == row['Pclass'])
    ) 
    return grouped_median_train[condition]['Age'].values[0]

xy_train['Age'] = xy_train.apply(lambda row: fill_age(row) if np.isnan(row['Age']) else row['Age'], axis=1)
x_test['Age'] = x_test.apply(lambda row: fill_age(row) if np.isnan(row['Age']) else row['Age'], axis=1)

In [71]:
#optional age cleanup part 3 (scale 2 ways)
#xy_train.Age = xy_train.Age/80
#x_test.Age = x_test.Age/80

xy_train['Age_fit'] = StandardScaler().fit_transform(xy_train['Age'].values.reshape(-1, 1))
x_test['Age_fit'] = StandardScaler().fit_transform(x_test['Age'].values.reshape(-1, 1))

In [4]:
Title_Dictionary = {
    "Capt": "Officer",
    "Col": "Officer",
    "Major": "Officer",
    "Jonkheer": "Royalty",
    "Don": "Royalty",
    "Sir" : "Royalty",
    "Dr": "Officer",
    "Rev": "Officer",
    "the Countess":"Royalty",
    "Mme": "Mrs",
    "Mlle": "Miss",
    "Ms": "Mrs",
    "Mr" : "Mr",
    "Mrs" : "Mrs",
    "Miss" : "Miss",
    "Master" : "Master",
    "Lady" : "Royalty"
}


# we extract the title from each name
xy_train['Title'] = xy_train['Name'].map(lambda name:name.split(',')[1].split('.')[0].strip())
x_test['Title'] = x_test['Name'].map(lambda name:name.split(',')[1].split('.')[0].strip())

# a map of more aggregated title
# we map each title
xy_train['Title'] = xy_train.Title.map(Title_Dictionary)
x_test['Title'] = x_test.Title.map(Title_Dictionary)

#convert to one hot
titles_dummies_tr = pd.get_dummies(xy_train['Title'], prefix='Title')
xy_train = pd.concat([xy_train, titles_dummies_tr], axis=1)

titles_dummies_t = pd.get_dummies(x_test['Title'], prefix='Title')
x_test = pd.concat([x_test, titles_dummies_t], axis=1)


xy_train.sample(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,Pclass_1,Pclass_2,Pclass_3,Title,Title_Master,Title_Miss,Title_Mr,Title_Mrs,Title_Officer,Title_Royalty
470,471,0,3,"Keefe, Mr. Arthur",1,,0,0,323592,7.25,...,0,0,1,Mr,0,0,1,0,0,0
194,195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",0,44.0,0,0,PC 17610,27.7208,...,1,0,0,Mrs,0,0,0,1,0,0
265,266,0,2,"Reeves, Mr. David",1,36.0,0,0,C.A. 17248,10.5,...,0,1,0,Mr,0,0,1,0,0,0


In [72]:
xy_train.sample(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,Cabin_D,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Cabin_U,Embarked_C,Embarked_Q,Embarked_S,Age_fit
43,44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",0,3.0,1,2,SC/Paris 2123,41.5792,...,0,0,0,0,0,1,1,0,0,-1.937946
613,614,0,3,"Horgan, Mr. John",1,26.0,0,0,370377,7.75,...,0,0,0,0,0,1,0,1,0,-0.232676
453,454,1,1,"Goldenberg, Mr. Samuel L",1,49.0,1,0,17453,89.1042,...,0,0,0,0,0,0,1,0,0,1.472594


In [8]:
xy_train['FamilyS'] = xy_train['SibSp'] + xy_train['Parch'] + 1
x_test['FamilyS'] = x_test['SibSp'] + x_test['Parch'] + 1



# introducing a new feature : the size of families (including the passenger)
xy_train['FamilySize'] = xy_train['Parch'] + xy_train['SibSp'] + 1
   
# introducing other features based on the family size
xy_train['Singleton'] = xy_train['FamilySize'].map(lambda s: 1 if s == 1 else 0)
xy_train['SmallFamily'] = xy_train['FamilySize'].map(lambda s: 1 if 2 <= s <= 4 else 0)
xy_train['LargeFamily'] = xy_train['FamilySize'].map(lambda s: 1 if 5 <= s else 0)


# introducing a new feature : the size of families (including the passenger)
x_test['FamilySize'] = x_test['Parch'] + x_test['SibSp'] + 1
   
# introducing other features based on the family size
x_test['Singleton'] = x_test['FamilySize'].map(lambda s: 1 if s == 1 else 0)
x_test['SmallFamily'] = x_test['FamilySize'].map(lambda s: 1 if 2 <= s <= 4 else 0)
x_test['LargeFamily'] = x_test['FamilySize'].map(lambda s: 1 if 5 <= s else 0)



In [9]:
xy_train.sample(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,Title_Miss,Title_Mr,Title_Mrs,Title_Officer,Title_Royalty,FamilyS,FamilySize,Singleton,SmallFamily,LargeFamily
384,385,0,3,"Plotcharsky, Mr. Vasil",1,26.0,0,0,349227,7.8958,...,0,1,0,0,0,1,1,1,0,0
538,539,0,3,"Risien, Mr. Samuel Beard",1,26.0,0,0,364498,14.5,...,0,1,0,0,0,1,1,1,0,0
590,591,0,3,"Rintamaki, Mr. Matti",1,35.0,0,0,STON/O 2. 3101273,7.125,...,0,1,0,0,0,1,1,1,0,0


In [None]:
#process fare
xy_train.Fare.fillna(xy_train.Fare.mean(), inplace=True)
x_test.Fare.fillna(xy_train.Fare.mean(), inplace=True) # we use tran set data here



In [None]:
#extra fare processing
#xy_train.Fare = xy_train.Fare/513
#x_test.Fare = x_test.Fare/513

#put back
xy_train.Fare = xy_train.Fare*513
x_test.Fare = x_test.Fare*513

In [None]:
# test : scale fare
xy_train['Fare_fit'] = StandardScaler().fit_transform(xy_train['Fare'].values.reshape(-1, 1))
x_test['Fare_fit'] = StandardScaler().fit_transform(x_test['Fare'].values.reshape(-1, 1))

In [10]:
#process cabin

# replacing missing cabins with U (for Uknown)
xy_train.Cabin.fillna('U', inplace=True)
x_test.Cabin.fillna('U', inplace=True)
    
# mapping each Cabin value with the cabin letter
xy_train['Cabin'] = xy_train['Cabin'].map(lambda c: c[0])
x_test['Cabin'] = x_test['Cabin'].map(lambda c: c[0])
    
# dummy encoding ...
cabin_dummies_tr = pd.get_dummies(xy_train['Cabin'], prefix='Cabin')    
xy_train = pd.concat([xy_train, cabin_dummies_tr], axis=1)

cabin_dummies_t = pd.get_dummies(x_test['Cabin'], prefix='Cabin')    
x_test = pd.concat([x_test, cabin_dummies_t], axis=1)

In [11]:
# get the number of missing data points per column
missing_values_count = xy_train.isnull().sum()

# look at the # of missing points in the first ten columns
print (missing_values_count) #5 is age

# how many total missing values do we have?
total_cells = (xy_train.shape[0])
total_missing = missing_values_count[5]

# percent of data that is missing
#(total_missing/total_cells) * 100

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin            0
Embarked         2
Pclass_1         0
Pclass_2         0
Pclass_3         0
Title            0
Title_Master     0
Title_Miss       0
Title_Mr         0
Title_Mrs        0
Title_Officer    0
Title_Royalty    0
FamilyS          0
FamilySize       0
Singleton        0
SmallFamily      0
LargeFamily      0
Cabin_A          0
Cabin_B          0
Cabin_C          0
Cabin_D          0
Cabin_E          0
Cabin_F          0
Cabin_G          0
Cabin_T          0
Cabin_U          0
dtype: int64


In [12]:
#play with embarked
xy_train['Embarked'].fillna('S', inplace = True)
xy_train.isnull().sum().sort_values(ascending = False)

# dummy encoding ...
Embarked_tr = pd.get_dummies(xy_train['Embarked'], prefix='Embarked')    
xy_train = pd.concat([xy_train, Embarked_tr], axis=1)

Embarked_t = pd.get_dummies(x_test['Embarked'], prefix='Embarked')    
x_test = pd.concat([x_test, Embarked_t], axis=1)

In [70]:
from sklearn.preprocessing import StandardScaler

In [None]:
#test with pclass
xy_train['Pclass_fit'] = StandardScaler().fit_transform(xy_train['Pclass'].values.reshape(-1, 1))
x_test['Pclass_fit'] = StandardScaler().fit_transform(x_test['Pclass'].values.reshape(-1, 1))

In [13]:
xy_train.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Pclass_1,Pclass_2,...,Cabin_C,Cabin_D,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Cabin_U,Embarked_C,Embarked_Q,Embarked_S
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,...,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,0.647587,29.138238,0.523008,0.381594,32.204208,0.242424,0.20651,...,0.066218,0.037037,0.035915,0.01459,0.004489,0.001122,0.771044,0.188552,0.08642,0.725028
std,257.353842,0.486592,0.836071,0.47799,13.495175,1.102743,0.806057,49.693429,0.42879,0.405028,...,0.248802,0.188959,0.186182,0.119973,0.06689,0.033501,0.420397,0.391372,0.281141,0.446751
min,1.0,0.0,1.0,0.0,0.42,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,223.5,0.0,2.0,0.0,21.0,0.0,0.0,7.9104,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
50%,446.0,0.0,3.0,1.0,26.0,0.0,0.0,14.4542,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
75%,668.5,1.0,3.0,1.0,36.75,1.0,0.0,31.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
max,891.0,1.0,3.0,1.0,80.0,8.0,6.0,512.3292,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [69]:
xy_train.sample(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,Cabin_C,Cabin_D,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Cabin_U,Embarked_C,Embarked_Q,Embarked_S
100,101,0,3,"Petranec, Miss. Matilda",0,28.0,0,0,349245,7.8958,...,0,0,0,0,0,0,1,0,0,1
342,343,0,2,"Collander, Mr. Erik Gustaf",1,28.0,0,0,248740,13.0,...,0,0,0,0,0,0,1,0,0,1
404,405,0,3,"Oreskovic, Miss. Marija",0,20.0,0,0,315096,8.6625,...,0,0,0,0,0,0,1,0,0,1
82,83,1,3,"McDermott, Miss. Brigdet Delia",0,18.0,0,0,330932,7.7875,...,0,0,0,0,0,0,1,0,1,0
212,213,0,3,"Perkin, Mr. John Henry",1,22.0,0,0,A/5 21174,7.25,...,0,0,0,0,0,0,1,0,0,1
45,46,0,3,"Rogers, Mr. William John",1,26.0,0,0,S.C./A.4. 23567,8.05,...,0,0,0,0,0,0,1,0,0,1
794,795,0,3,"Dantcheff, Mr. Ristiu",1,25.0,0,0,349203,7.8958,...,0,0,0,0,0,0,1,0,0,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,...,1,0,0,0,0,0,0,1,0,0
176,177,0,3,"Lefebre, Master. Henry Forbes",1,4.0,3,1,4133,25.4667,...,0,0,0,0,0,0,1,0,0,1
408,409,0,3,"Birkeland, Mr. Hans Martin Monsen",1,21.0,0,0,312992,7.775,...,0,0,0,0,0,0,1,0,0,1


In [246]:
class TitanicDataset(Dataset):
    """ Titanic dataset."""

    # Initialize your data, download, etc.
    def __init__(self):

        #Train data
        self.len = xy_train.shape[0]
        #self.pt_x_data = torch.from_numpy(np.array(xy_train.loc[:, ['Cabin_C','Cabin_D','Cabin_E','Cabin_F','Cabin_G','Age', 'Sex', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Parch', 'Title_Master', 'Title_Miss', 'Title_Mr', 'Title_Mrs','Pclass_3', 'Title_Officer', 'Title_Royalty', 'FamilySize', 'Singleton', 'SmallFamily', 'LargeFamily', 'Fare']].values, dtype='float')).float()
        self.pt_x_data = torch.from_numpy(np.array(xy_train.loc[:, ['Parch','SibSp','Singleton','Sex','Title_Miss', 'Title_Mrs', 'Cabin_U','Embarked_C', 'Embarked_Q', 'Embarked_S','Pclass_1', 'Pclass_2', 'Pclass_3']].values, dtype='float')).float()
        #self.pt_x_data = torch.from_numpy(np.array(xy_train.loc[:, ['1','2','3']].values, dtype='float')).float()
        self.y_data = np.array(xy_train.loc[:, ['Survived']].values, dtype='float')
        self.pt_y_data = torch.from_numpy(self.y_data).float()
        
        #Test data
        #self.pt_x_data_test = torch.from_numpy(np.array(x_test.loc[:, ['Age', 'Sex', 'Pclass_1', 'Pclass_2', 'Pclass_3','Parch', 'Title_Master','Title_Miss', 'Title_Mr', 'Title_Mrs', 'Title_Officer', 'Title_Royalty', 'FamilySize','Pclass_3', 'Singleton', 'SmallFamily', 'LargeFamily', 'Fare']].values, dtype='float')).float()
        #self.pt_x_data_test = torch.from_numpy(np.array(x_test.loc[:, [ 'Age','Pclass_1','Pclass_2','Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S', 'Cabin_E', 'Cabin_C','Cabin_U','Title_Mrs','Title_Miss', 'Title_Mr', 'Title_Master',  'Sex',  'Singleton', 'SmallFamily', 'LargeFamily']].values, dtype='float')).float()
        self.pt_x_data_test = torch.from_numpy(np.array(x_test.loc[:, ['Parch','SibSp','Singleton','Sex','Title_Miss', 'Title_Mrs', 'Cabin_U','Embarked_C', 'Embarked_Q', 'Embarked_S','Pclass_1', 'Pclass_2', 'Pclass_3']].values, dtype='float')).float()
        self.y_data_test = np.array(validation_set.loc[:, ['survived']].values, dtype='float')
        self.pt_y_data_test = torch.from_numpy(self.y_data_test).float()
        
        
    def __getitem__(self, index):
        return self.pt_x_data[index], self.pt_y_data[index]

    def __len__(self):
        return self.len


dataset = TitanicDataset()

In [247]:
train_loader = DataLoader(dataset=dataset,
                          batch_size=50,
                          shuffle=True,
                          num_workers=0)

In [248]:
#type(train_loader)
print (dataset.pt_x_data.shape)
print (dataset.y_data.shape)
print (dataset.pt_y_data.shape)

#print (dataset.pt_x_data)
#print (dataset.y_data)


torch.Size([891, 13])
(891, 1)
torch.Size([891, 1])


In [267]:
class Model(torch.nn.Module):

    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear module
        """
        super(Model, self).__init__()
        self.L1 = torch.nn.Linear(13, 270)
        self.L2 = torch.nn.Linear(270, 1)
        #self.L3 = torch.nn.Linear(50, 1)

        self.sigmoid = torch.nn.Sigmoid()
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Variables.
        """
        #out1 = self.sigmoid(self.l1(x))
        #out2 = self.sigmoid(self.l2(out1))
        #y_pred = self.sigmoid(self.l3(out2))
        
        #out1 = self.relu(self.L1(x))
        
        out1 = self.L1(x)
        out1 = F.dropout(out1, p=0.1)
        out1 = self.relu(out1)
        
        #out2 = self.L2(out1)
        #out2 = F.dropout(out2, p=0.1)
        #out2 = self.relu(out2)
        
        y_pred = self.sigmoid(self.L2(out1))
        
        
                
        return y_pred

# our model
model = Model()

In [268]:
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.BCELoss(size_average=True)
#optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)



In [269]:
# Training loop
model.train()

for epoch in range(200):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(inputs)

        # Compute and print loss
        loss = criterion(y_pred, labels)
        print(epoch, i, loss.data[0])

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()



0 0 tensor(0.6611)
0 1 tensor(0.7690)
0 2 tensor(0.6797)
0 3 tensor(0.5447)
0 4 tensor(0.6307)
0 5 tensor(0.6775)
0 6 tensor(0.5540)
0 7 tensor(0.4760)
0 8 tensor(0.3368)
0 9 tensor(0.5688)
0 10 tensor(0.5562)
0 11 tensor(0.4205)
0 12 tensor(0.3034)
0 13 tensor(0.3351)
0 14 tensor(0.3127)
0 15 tensor(0.5713)
0 16 tensor(0.4643)
0 17 tensor(0.4608)
1 0 tensor(0.3658)
1 1 tensor(0.3584)
1 2 tensor(0.4088)
1 3 tensor(0.3078)
1 4 tensor(0.3850)
1 5 tensor(0.5633)
1 6 tensor(0.5135)
1 7 tensor(0.4367)
1 8 tensor(0.2975)
1 9 tensor(0.5565)
1 10 tensor(0.4130)
1 11 tensor(0.4512)
1 12 tensor(0.4055)
1 13 tensor(0.4092)
1 14 tensor(0.6535)
1 15 tensor(0.4159)
1 16 tensor(0.4190)
1 17 tensor(0.5132)
2 0 tensor(0.2816)
2 1 tensor(0.4726)
2 2 tensor(0.5039)
2 3 tensor(0.3283)
2 4 tensor(0.4326)
2 5 tensor(0.3649)
2 6 tensor(0.5695)
2 7 tensor(0.3168)
2 8 tensor(0.4783)
2 9 tensor(0.4740)
2 10 tensor(0.4054)
2 11 tensor(0.4641)
2 12 tensor(0.3073)
2 13 tensor(0.3440)
2 14 tensor(0.6142)
2 15 tenso

23 6 tensor(0.3591)
23 7 tensor(0.4547)
23 8 tensor(0.3476)
23 9 tensor(0.2478)
23 10 tensor(0.4900)
23 11 tensor(0.5203)
23 12 tensor(0.5196)
23 13 tensor(0.3805)
23 14 tensor(0.3394)
23 15 tensor(0.4322)
23 16 tensor(0.4051)
23 17 tensor(0.5114)
24 0 tensor(0.4317)
24 1 tensor(0.3791)
24 2 tensor(0.4294)
24 3 tensor(0.6493)
24 4 tensor(0.4001)
24 5 tensor(0.3941)
24 6 tensor(0.4365)
24 7 tensor(0.3878)
24 8 tensor(0.4309)
24 9 tensor(0.3082)
24 10 tensor(0.4184)
24 11 tensor(0.2996)
24 12 tensor(0.2056)
24 13 tensor(0.4221)
24 14 tensor(0.3835)
24 15 tensor(0.4955)
24 16 tensor(0.3638)
24 17 tensor(0.4611)
25 0 tensor(0.4562)
25 1 tensor(0.4716)
25 2 tensor(0.5719)
25 3 tensor(0.3162)
25 4 tensor(0.3292)
25 5 tensor(0.3117)
25 6 tensor(0.4896)
25 7 tensor(0.6135)
25 8 tensor(0.4196)
25 9 tensor(0.2585)
25 10 tensor(0.3142)
25 11 tensor(0.3319)
25 12 tensor(0.6259)
25 13 tensor(0.4379)
25 14 tensor(0.4434)
25 15 tensor(0.3085)
25 16 tensor(0.3617)
25 17 tensor(0.2881)
26 0 tensor(0.46

46 2 tensor(0.3295)
46 3 tensor(0.4684)
46 4 tensor(0.4551)
46 5 tensor(0.3673)
46 6 tensor(0.3806)
46 7 tensor(0.3910)
46 8 tensor(0.3900)
46 9 tensor(0.3415)
46 10 tensor(0.2949)
46 11 tensor(0.4360)
46 12 tensor(0.2917)
46 13 tensor(0.3995)
46 14 tensor(0.5333)
46 15 tensor(0.4061)
46 16 tensor(0.4626)
46 17 tensor(0.2312)
47 0 tensor(0.2437)
47 1 tensor(0.4011)
47 2 tensor(0.2834)
47 3 tensor(0.5228)
47 4 tensor(0.3154)
47 5 tensor(0.3245)
47 6 tensor(0.3841)
47 7 tensor(0.5238)
47 8 tensor(0.4656)
47 9 tensor(0.4114)
47 10 tensor(0.3795)
47 11 tensor(0.2923)
47 12 tensor(0.3891)
47 13 tensor(0.3513)
47 14 tensor(0.4039)
47 15 tensor(0.4037)
47 16 tensor(0.4387)
47 17 tensor(0.3600)
48 0 tensor(0.4036)
48 1 tensor(0.3062)
48 2 tensor(0.2733)
48 3 tensor(0.4446)
48 4 tensor(0.3562)
48 5 tensor(0.3868)
48 6 tensor(0.4011)
48 7 tensor(0.3935)
48 8 tensor(0.3419)
48 9 tensor(0.5628)
48 10 tensor(0.4389)
48 11 tensor(0.2914)
48 12 tensor(0.2801)
48 13 tensor(0.4185)
48 14 tensor(0.2234)

69 9 tensor(0.4499)
69 10 tensor(0.3086)
69 11 tensor(0.3573)
69 12 tensor(0.3608)
69 13 tensor(0.4405)
69 14 tensor(0.3830)
69 15 tensor(0.3597)
69 16 tensor(0.5320)
69 17 tensor(0.2745)
70 0 tensor(0.3969)
70 1 tensor(0.3227)
70 2 tensor(0.4024)
70 3 tensor(0.2901)
70 4 tensor(0.3988)
70 5 tensor(0.5693)
70 6 tensor(0.3218)
70 7 tensor(0.3878)
70 8 tensor(0.3434)
70 9 tensor(0.2797)
70 10 tensor(0.3684)
70 11 tensor(0.2799)
70 12 tensor(0.3593)
70 13 tensor(0.4304)
70 14 tensor(0.4220)
70 15 tensor(0.3616)
70 16 tensor(0.3593)
70 17 tensor(0.4209)
71 0 tensor(0.2910)
71 1 tensor(0.4452)
71 2 tensor(0.2702)
71 3 tensor(0.3882)
71 4 tensor(0.3722)
71 5 tensor(0.3284)
71 6 tensor(0.2289)
71 7 tensor(0.5260)
71 8 tensor(0.4833)
71 9 tensor(0.4159)
71 10 tensor(0.2130)
71 11 tensor(0.4828)
71 12 tensor(0.3356)
71 13 tensor(0.3834)
71 14 tensor(0.3808)
71 15 tensor(0.3835)
71 16 tensor(0.3819)
71 17 tensor(0.4831)
72 0 tensor(0.4247)
72 1 tensor(0.1845)
72 2 tensor(0.2409)
72 3 tensor(0.26

94 8 tensor(0.2862)
94 9 tensor(0.1970)
94 10 tensor(0.4780)
94 11 tensor(0.3740)
94 12 tensor(0.3681)
94 13 tensor(0.3254)
94 14 tensor(0.4647)
94 15 tensor(0.5077)
94 16 tensor(0.4250)
94 17 tensor(0.2347)
95 0 tensor(0.3262)
95 1 tensor(0.2943)
95 2 tensor(0.4167)
95 3 tensor(0.4650)
95 4 tensor(0.3673)
95 5 tensor(0.4097)
95 6 tensor(0.4113)
95 7 tensor(0.3725)
95 8 tensor(0.2724)
95 9 tensor(0.3430)
95 10 tensor(0.4089)
95 11 tensor(0.2741)
95 12 tensor(0.2951)
95 13 tensor(0.3880)
95 14 tensor(0.4821)
95 15 tensor(0.2793)
95 16 tensor(0.4065)
95 17 tensor(0.2949)
96 0 tensor(0.4564)
96 1 tensor(0.3393)
96 2 tensor(0.3541)
96 3 tensor(0.3372)
96 4 tensor(0.3102)
96 5 tensor(0.3019)
96 6 tensor(0.4175)
96 7 tensor(0.3521)
96 8 tensor(0.2633)
96 9 tensor(0.4030)
96 10 tensor(0.2930)
96 11 tensor(0.5867)
96 12 tensor(0.3362)
96 13 tensor(0.4917)
96 14 tensor(0.4435)
96 15 tensor(0.2567)
96 16 tensor(0.3122)
96 17 tensor(0.3915)
97 0 tensor(0.2396)
97 1 tensor(0.4375)
97 2 tensor(0.33

117 10 tensor(0.3363)
117 11 tensor(0.2333)
117 12 tensor(0.3428)
117 13 tensor(0.4014)
117 14 tensor(0.3377)
117 15 tensor(0.3669)
117 16 tensor(0.3569)
117 17 tensor(0.3466)
118 0 tensor(0.2468)
118 1 tensor(0.3665)
118 2 tensor(0.3080)
118 3 tensor(0.4369)
118 4 tensor(0.3616)
118 5 tensor(0.2813)
118 6 tensor(0.3123)
118 7 tensor(0.3938)
118 8 tensor(0.3327)
118 9 tensor(0.3845)
118 10 tensor(0.4318)
118 11 tensor(0.2902)
118 12 tensor(0.3627)
118 13 tensor(0.3165)
118 14 tensor(0.4883)
118 15 tensor(0.3921)
118 16 tensor(0.3892)
118 17 tensor(0.3406)
119 0 tensor(0.2596)
119 1 tensor(0.2392)
119 2 tensor(0.2449)
119 3 tensor(0.3940)
119 4 tensor(0.2912)
119 5 tensor(0.3316)
119 6 tensor(0.3625)
119 7 tensor(0.2633)
119 8 tensor(0.4583)
119 9 tensor(0.3446)
119 10 tensor(0.6112)
119 11 tensor(0.4658)
119 12 tensor(0.2722)
119 13 tensor(0.3686)
119 14 tensor(0.3584)
119 15 tensor(0.4017)
119 16 tensor(0.5009)
119 17 tensor(0.3296)
120 0 tensor(0.3972)
120 1 tensor(0.3613)
120 2 tens

142 9 tensor(0.4458)
142 10 tensor(0.3155)
142 11 tensor(0.3958)
142 12 tensor(0.4244)
142 13 tensor(0.3420)
142 14 tensor(0.2959)
142 15 tensor(0.3691)
142 16 tensor(0.3327)
142 17 tensor(0.5166)
143 0 tensor(0.2509)
143 1 tensor(0.3637)
143 2 tensor(0.4332)
143 3 tensor(0.3958)
143 4 tensor(0.3627)
143 5 tensor(0.4076)
143 6 tensor(0.4380)
143 7 tensor(0.2513)
143 8 tensor(0.3186)
143 9 tensor(0.3588)
143 10 tensor(0.3803)
143 11 tensor(0.4093)
143 12 tensor(0.2802)
143 13 tensor(0.3782)
143 14 tensor(0.2786)
143 15 tensor(0.3569)
143 16 tensor(0.3926)
143 17 tensor(0.4372)
144 0 tensor(0.3445)
144 1 tensor(0.3139)
144 2 tensor(0.3243)
144 3 tensor(0.4350)
144 4 tensor(0.2987)
144 5 tensor(0.3564)
144 6 tensor(0.3390)
144 7 tensor(0.2519)
144 8 tensor(0.4052)
144 9 tensor(0.3925)
144 10 tensor(0.5680)
144 11 tensor(0.3787)
144 12 tensor(0.3090)
144 13 tensor(0.3851)
144 14 tensor(0.2751)
144 15 tensor(0.3399)
144 16 tensor(0.3060)
144 17 tensor(0.3623)
145 0 tensor(0.4144)
145 1 tens

165 8 tensor(0.2938)
165 9 tensor(0.5804)
165 10 tensor(0.3144)
165 11 tensor(0.2745)
165 12 tensor(0.2806)
165 13 tensor(0.3880)
165 14 tensor(0.4325)
165 15 tensor(0.5039)
165 16 tensor(0.2381)
165 17 tensor(0.3997)
166 0 tensor(0.3548)
166 1 tensor(0.3530)
166 2 tensor(0.2933)
166 3 tensor(0.4625)
166 4 tensor(0.4294)
166 5 tensor(0.3854)
166 6 tensor(0.2383)
166 7 tensor(0.4183)
166 8 tensor(0.2265)
166 9 tensor(0.3079)
166 10 tensor(0.3217)
166 11 tensor(0.4119)
166 12 tensor(0.2436)
166 13 tensor(0.4095)
166 14 tensor(0.3833)
166 15 tensor(0.3459)
166 16 tensor(0.3175)
166 17 tensor(0.4911)
167 0 tensor(0.2593)
167 1 tensor(0.1907)
167 2 tensor(0.3204)
167 3 tensor(0.4349)
167 4 tensor(0.4401)
167 5 tensor(0.2545)
167 6 tensor(0.3031)
167 7 tensor(0.4102)
167 8 tensor(0.3892)
167 9 tensor(0.3419)
167 10 tensor(0.4045)
167 11 tensor(0.3181)
167 12 tensor(0.4318)
167 13 tensor(0.4068)
167 14 tensor(0.3424)
167 15 tensor(0.3042)
167 16 tensor(0.5008)
167 17 tensor(0.2853)
168 0 tens

189 4 tensor(0.4471)
189 5 tensor(0.3735)
189 6 tensor(0.4235)
189 7 tensor(0.3032)
189 8 tensor(0.3482)
189 9 tensor(0.2933)
189 10 tensor(0.3668)
189 11 tensor(0.4935)
189 12 tensor(0.4578)
189 13 tensor(0.3863)
189 14 tensor(0.3094)
189 15 tensor(0.2737)
189 16 tensor(0.2636)
189 17 tensor(0.1614)
190 0 tensor(0.2559)
190 1 tensor(0.2449)
190 2 tensor(0.2660)
190 3 tensor(0.5440)
190 4 tensor(0.4209)
190 5 tensor(0.3567)
190 6 tensor(0.3477)
190 7 tensor(0.2369)
190 8 tensor(0.3224)
190 9 tensor(0.3048)
190 10 tensor(0.3601)
190 11 tensor(0.4160)
190 12 tensor(0.5029)
190 13 tensor(0.3375)
190 14 tensor(0.3807)
190 15 tensor(0.3370)
190 16 tensor(0.4614)
190 17 tensor(0.4610)
191 0 tensor(0.3024)
191 1 tensor(0.3665)
191 2 tensor(0.4183)
191 3 tensor(0.4895)
191 4 tensor(0.2433)
191 5 tensor(0.3341)
191 6 tensor(0.3835)
191 7 tensor(0.3346)
191 8 tensor(0.3828)
191 9 tensor(0.3589)
191 10 tensor(0.3958)
191 11 tensor(0.3406)
191 12 tensor(0.3145)
191 13 tensor(0.3151)
191 14 tensor(

In [270]:
# Train and Test Set Predictions
print ("train set accuracy = " + str (accuracy(dataset.pt_x_data, dataset.y_data)))
print ("test set accuracy = " + str (accuracy(dataset.pt_x_data_test, dataset.y_data_test)))

train set accuracy = 0.8462401795735129
test set accuracy = 0.7272727272727273


In [None]:
#show bad test rows
model.eval()

prediction = model(dataset.pt_x_data_test)

np_prediction = (prediction.detach().numpy())
np_prediction_rd = np.round(np_prediction)
print (np_prediction_rd.shape)

results = pd.DataFrame(np_prediction)
results['pred_rd'] = np_prediction_rd
results['survived'] = validation_set['survived']
results['name'] = validation_set['Name']

results

In [238]:
#Run test set through
model.eval()
test_pred = model(dataset.pt_x_data_test)
print (test_pred.shape)
print (type(test_pred))

#Add prediction col to test panda
#to_np = np.round(test_pred.detach().numpy())


#to_np = (test_pred.detach().numpy())
to_np = np.where((test_pred.detach().numpy())>0.5,1.0,0)


to_np = to_np.astype(int)
x_test['Survived'] = to_np


#Export to csv for submission
x_test.to_csv('rapidmix106_270mod-dropout_noAdam.csv', columns = ['PassengerId','Survived'], index = False)


torch.Size([418, 1])
<class 'torch.Tensor'>


In [None]:
print (test_pred[73])

In [25]:
def accuracy(X, Y):
    """
    This function is used to measure accuracy of the model.
    
    Arguments:
    X -- data set of examples you would like to label
    Y -- labels
    
    Returns:
    p -- predictions for the given dataset X
    """
    model.eval()
    prediction = model(X)
    #print (prediction)
    np_prediction = np.round(prediction.detach().numpy())
    predictions_correct = np.equal(np_prediction, Y)
    
    p = np.mean(predictions_correct)
    
    
    return p

In [None]:
......................Extra Stuff...........
convertfunc = lambda x: float(x.strip("%"))/100
test = np.genfromtxt('./data/titanic/train.csv', delimiter=',', skip_header=1, usecols = (0,1,2,4))

#aai = a.iloc[:, [0,3]].values
train_x = a.loc[:, ['PassengerId', 'Pclass', 'Sex']].values
train_y = a.loc[:, ['Survived']].values

In [None]:
print ("pt_x_data: " + str (dataset.pt_x_data.shape))
print ("pt_y_data: " + str (dataset.pt_y_data.shape))
print ("y_data: " + str (dataset.y_data.shape))

In [None]:
print (type (train_x))
print (type (train_y))

print (train_x)
print (train_y)

In [None]:
#Read in debug file
debug_vals = np.genfromtxt('./data/titanic/debug_justPID.csv', delimiter=',', dtype=np.float32, skip_header=1)

#convert to correct format
#debug_x_data = torch.from_numpy(np.array(xy_train.loc[:, ['PassengerId', 'Pclass', 'Sex']].values, dtype='float')).float()
#debug_y_data = np.array(xy_train.loc[:, ['Survived']].values, dtype='float')

debug_x_data = torch.from_numpy(debug_vals[:, 0:-1])
debug_y_data = np.array(debug_vals[:, [-1]])
print (debug_x_data)
print (debug_y_data)

print ("debug set accuracy = " + str (accuracy(debug_x_data,debug_y_data)))

In [None]:
a = np.array([10,-1])
b = np.array([10,1])



In [None]:
np.dot(a.T,b)