In [30]:
# Import libraries necessary for this project
import numpy as np
import pandas as pd
from IPython.display import display # Allows the use of display() for DataFrames

# Import supplementary visualizations code visuals.py
import visuals as vs

def accuracy_score(truth, pred):
    """ Returns accuracy score for input truth and predictions. """
    
    # Ensure that the number of predictions matches number of outcomes
    if len(truth) == len(pred): 
        
        # Calculate and return the accuracy as a percent
        return "Predictions have an accuracy of {:.2f}%.".format((truth == pred).mean()*100)
    
    else:
        return "Number of predictions does not match number of outcomes!"

def predictions_0(data):
    """ Model with no features. Always predicts a passenger did not survive. """

    predictions = []
    for _, passenger in data.iterrows():
        
        # Predict the survival of 'passenger'
        predictions.append(0)
    
    # Return our predictions
    return pd.Series(predictions)

def predictions_1(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
        if passenger['Sex'] == 'female':
            predictions.append(1)
        else:
            predictions.append(0)
    
    # Return our predictions
    return pd.Series(predictions)

def predictions_2(data):
    """ Model with two features: 
            - Predict a passenger survived if they are female.
            - Predict a passenger survived if they are male and younger than 10. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
        if passenger['Sex'] == 'female':
            predictions.append(1)
        else:
            if passenger['Age'] < 10:
                predictions.append(1)
            else:
                predictions.append(0)
    
    # Return our predictions
    return pd.Series(predictions)
    
def predictions_3(data):
    """ Model with multiple features. Makes a prediction with an accuracy of at least 80%. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
        if passenger['Sex'] == 'female':
            if passenger['Pclass'] == 3 and passenger['Age'] > 20 and passenger['Age'] < 60:
                # Do not survived
                predictions.append(0)
            else:
                # Survived
                predictions.append(1)
        else:
            if passenger['Pclass'] == 1 and passenger['Age'] < 40:
                # Survived
                predictions.append(1)
            else:
                # Do not survived
                predictions.append(0)
    
    # Return our predictions
    return pd.Series(predictions)
    
def predictions_4(data):
    """ Model with multiple features. Makes a prediction with an accuracy of at least 80%. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
        if passenger['Parch'] == 0:
            if passenger['Sex'] == 'female':
                if passenger['Pclass'] < 3:
                    # Survived
                    predictions.append(1)
                else:
                    # Do not survived
                    predictions.append(0)
            else:
                if passenger['Age'] < 10:
                    # Survived
                    predictions.append(1)
                else:
                    # Do not survived
                    predictions.append(0)
        else:
            if passenger['Sex'] == 'female':
                if passenger['Pclass'] < 3:
                    # Survived
                    predictions.append(1)
                else:
                    # Do not survived
                    predictions.append(0)
            else:
                if passenger['Age'] < 10:
                    # Survived
                    predictions.append(1)
                else:
                    # Do not survived
                    predictions.append(0)

    # Return our predictions
    return pd.Series(predictions)
 
def predictions_5(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
        if passenger['Pclass'] == 1:
            # Survived
            predictions.append(1)
        else:
            # Do not survived
            predictions.append(0)
    
    # Return our predictions
    return pd.Series(predictions)
    
def predictions_(data):
    """ Model with one feature: 
            - Predict a passenger survived if they are female. """
    
    predictions = []
    for _, passenger in data.iterrows():
        
        if passenger['Pclass'] == 1:
            # Survived
            predictions.append(1)
        else:
            if passenger['Pclass'] == 3:
                # Do not survived
                predictions.append(0)
            else:
                if passenger['Sex'] == 'female':
                    # Survived
                    predictions.append(1)
                else:
                    # Do not survived
                    predictions.append(0)
    
    # Return our predictions
    return pd.Series(predictions)
    
# Pretty display for notebooks
#%matplotlib inline

# Load the dataset
in_file = 'titanic_data.csv'
full_data = pd.read_csv(in_file)

# Print the first few entries of the RMS Titanic data
display(full_data.head())

# Store the 'Survived' feature in a new variable and remove it from the dataset
outcomes = full_data['Survived']
data = full_data.drop('Survived', axis = 1)

# Show the new dataset with 'Survived' removed
display(data.head())

# Test the 'accuracy_score' function
predictions = pd.Series(np.ones(5, dtype = int))
print accuracy_score(outcomes[:5], predictions)

#vs.survival_stats(data, outcomes, 'Pclass')
#vs.survival_stats(data, outcomes, 'Sex')
#vs.survival_stats(data, outcomes, 'Age')
#vs.survival_stats(data, outcomes, 'Fare')


# # Make the predictions
# predictions = predictions_0(data)
# print accuracy_score(outcomes, predictions)
# # Make the predictions
# predictions = predictions_1(data)
# print accuracy_score(outcomes, predictions)
# # Make the predictions
# predictions = predictions_2(data)
# print accuracy_score(outcomes, predictions)
# # Make the predictions
# predictions = predictions_3(data)
# print accuracy_score(outcomes, predictions)
# # Make the predictions
# predictions = predictions_4(data)
# print accuracy_score(outcomes, predictions)
# # Make the predictions
# predictions = predictions_5(data)
# print accuracy_score(outcomes, predictions)
# Make the predictions
predictions = predictions_6(data)
print accuracy_score(outcomes, predictions)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


Predictions have an accuracy of 60.00%.
Predictions have an accuracy of 71.49%.
