# Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Loading Data

In [None]:
training_data = pd.read_csv('../input/train.csv')
test_data = pd.read_csv('../input/test.csv')

# Exploring the Data

In [None]:
training_data.info()

In [None]:
test_data.info()

In [None]:
training_data[:10]

In [None]:
training_data.describe() # automatically summarize numeric columns

In [None]:
training_data.describe(include=['O']) # summarize columns containing strings

## By-Column Queries

In [None]:
training_data['Sex']

In [None]:
training_data['Sex'].value_counts()

In [None]:
training_data[(training_data['Sex'] == 'female') & (training_data['Pclass'] == 3)]['Survived'].value_counts()

In [None]:
training_data[(training_data['Sex'] == 'female') & (training_data['Fare'] < 20)
              & (training_data['Pclass'] == 3)]['Survived'].value_counts()

In [None]:
training_data[(training_data['Sex'] == 'male') 
              & (training_data['Pclass'] == 1) & (training_data['Survived'] == 1)].describe()

In [None]:
training_data[(training_data['Sex'] == 'male') 
              & (training_data['Pclass'] == 1) & (training_data['Survived'] == 1)]

In [None]:
from pandas.plotting import scatter_matrix
training_data[(training_data['Sex'] == 'female')  
              & (training_data['Pclass'] < 3)].corr()

In [None]:
training_data[(training_data['Sex'] == 'male') 
              & (training_data['Pclass'] == 1) & (training_data['Survived'] == 0)]

In [None]:
training_data[['Sex', 'Survived']]

In [None]:
training_data[['Sex', 'Survived']].groupby(['Sex']).mean()

In [None]:
training_data[['Survived', 'Sex']].groupby('Sex').mean()

In [None]:
training_data[['Age', 'Survived','Sex']].groupby('Age').mean()

## Visualization

In [None]:
training_data.hist(figsize=(10,10))
plt.show()

In [None]:
training_data.plot(kind='density', subplots=True, sharex=False, figsize=(10,10))
plt.show()

In [None]:
training_data.corr()

In [None]:
from pandas.plotting import scatter_matrix
scatter_matrix(training_data, figsize=(10,10))
plt.show()

# Working with Rows

In [None]:
for idx, row in test_data.iterrows():
    print(row['Name'], row['Pclass'])

# Making Some Predictions

In [None]:
predictions = []
for idx, row in test_data.iterrows():
    if row['Sex'] == 'female':
        if row['Pclass'] < 3:
            predictions.append(1) # survived
        else:
            if(row['Fare'] < 20):
                # age<10 u lived, >10 was 50%, noAge 7:5
                predictions.append(1)
            else:
                predictions.append(0)
    else:
        if (row['Age'] < 10):
            predictions.append(1)
        else:
            predictions.append(0)

In [None]:
len(predictions)

In [None]:
test_data['Survived'] = predictions

In [None]:
test_data[:10]

In [None]:
test_data

# Generating a Submission

In [None]:
test_data[['PassengerId', 'Survived']].to_csv('submission.csv', index=False)