In [1]:
import pandas
import numpy

In [2]:
titanic_survival = pandas.read_csv('csv/titanic_survival.csv')

## Check if values are NaN

In [3]:
sex = titanic_survival["sex"]
sex_is_null = pandas.isnull(sex)
sex_null_true = sex[sex_is_null]
sex_null_true

1309    NaN
Name: sex, dtype: object

## Statistics

In [4]:
titanic_survival["age"].mean()

29.8811345124283

## pivot table

In [5]:
passenger_class_fares = titanic_survival.pivot_table(index="pclass", values="fare", aggfunc=numpy.mean)
passenger_class_fares

pclass
1.0    87.508992
2.0    21.179196
3.0    13.302889
Name: fare, dtype: float64

## Drop any rows that contain missing values.

Specifying axis=0 or axis='index' will drop any rows that have null values, <br/>
while specifying axis=1 or axis='columns' will drop any columns that have null values.

In [6]:
drop_na_rows = titanic_survival.dropna(axis=0)

## Use index

In [8]:
new_titanic_survival = titanic_survival.sort_values("age", ascending=False)
first_row_first_column = new_titanic_survival.iloc[0,0]
all_rows_first_three_columns = new_titanic_survival.iloc[:,0:3]
row_index_83_age = new_titanic_survival.loc[83,"age"]
row_index_766_pclass = new_titanic_survival.loc[766,"pclass"]

## Apply Functions Over a DataFrame

By passing in the `axis=1` argument, we can use the DataFrame.apply() method to iterate over rows instead of columns.

In [9]:
# This function returns the hundredth item from a series
def hundredth_row(column):
    # Extract the hundredth item
    hundredth_item = column.iloc[99]
    return hundredth_item

# Return the hundredth item from each column
hundredth_row_var = titanic_survival.apply(hundredth_row)
hundredth_row_var

pclass                                                       1
survived                                                     1
name         Duff Gordon, Lady. (Lucille Christiana Sutherl...
sex                                                     female
age                                                         48
sibsp                                                        1
parch                                                        0
ticket                                                   11755
fare                                                      39.6
cabin                                                      A16
embarked                                                     C
boat                                                         1
body                                                       NaN
home.dest                                       London / Paris
dtype: object

In [10]:
def is_adult(row):
    if row["age"] < 18:
        return 'minor'
    elif pandas.isnull(row['age']):
        return 'unknown'
    else:
        return 'adult'

age_labels = titanic_survival.apply(is_adult, axis=1)
age_labels.head()

0    adult
1    minor
2    minor
3    adult
4    adult
dtype: object