#### Extract data from train.csv file
This function takes the excel file Train.csv in input and return two lists:
    1. A list of the features corresponding of each passenger [Ticket Class; Sex; Age; SibSp; Parch; Fare] 
    2. The label corresponding: 1 if he survived, 0 if he died
    3. A list of names of the person with some lacking features

In [None]:
def extractionTrain (train):
    import csv
    file = open(train, "rt")

    features = []    
    targets = []
    namesToRemove = []
    
    try:
        reader = csv.reader(file)
        
        for row in reader:            
            feature = []
            feature_int = []
            
            if row[1] != 'Survived': 
                feature.append(row[2])
                
                # Male are converted in 0, Female in 1
                if row[4]=='male':  
                        feature.append(0)
                if row[4]=='female':
                    feature.append(1)
                    
                feature.append(row[5])
                feature.append(row[6])
                feature.append(row[7])
                feature.append(row[9])  
                
                # All the data with unknown feature must be removed from the set
                if '' not in feature: 
                    # The label is added to targets vector: 1 if the passenger survived, 0 if he died
                    targets.append(int(row[1])) 
                    for i in range(len(feature)):
                        if i < 5:
                           feature_int.append(int(float(feature[i]))) 
                        else:
                            feature_int.append(float(feature[i]))
                    features.append(feature_int) 
                
                # Keep in memory the name of the passenger whose some features are missing
                else :
                    namesToRemove.append(row[3])                   
                    
                    
    finally:
        file.close()
    return features, targets, namesToRemove

#### Extract data from titanic_tailor.csv file
This functions extracts the features from the "titanic_tailor.csv" file. These are "fake" data created to see how they can affect the performance of a classifier. They correspond to the height and the weight of the passengers.
Furthermore, this functions takes in input the list of names to remove obtained with the previous function and thus remove the corresponding data.

In [None]:
def extractionTitanicTailor (train, names):
    import csv
    file = open(train, "rt")

    features = []   
    try:
        reader = csv.reader(file)
        
        for row in reader: 
            feature = []
            if row[1] != 'Name': 
                # Verify that the person must not be removed
                if row[1] not in names: 
                    feature.append(int(row[2]))
                    feature.append(int(row[3]))
                    features.append(feature)                  
                
    finally:
        file.close()
    return features

#### Extract data from swimworld.csv file
This functions extracts the features from the "swimworld.csv" file. These are "fake" data created to see how they can affect the performance of a classifier. They correspond to the fact that the passenger belonged or not to a swimming club (0 or 1).
Furthermore, this functions takes in input the list of names to remove obtained with the previous function and thus remove the corresponding data.

In [None]:
def extractionSwimWorld (train, names):
    import csv
    file = open(train, "rt")
    features = []
    try:
        reader = csv.reader(file)
        
        for row in reader:
            if row[1] != "Name": 
                # Verify that the person must not be removed
                if row[1] not in names:
                    features.append(int(row[2]))
                    
    finally:
        file.close()
        
    return features