In [68]:
import pandas as pd
import re

In [69]:
df = pd.read_csv('titanic.csv')
df.sample(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
547,548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C
829,830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62.0,0,0,113572,80.0,B28,
843,844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
797,798,1,3,"Osman, Mrs. Mara",female,31.0,0,0,349244,8.6833,,S


<h2>Предварительный осмотр данных<h2>

In [70]:
df.shape

(891, 12)

In [71]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


## Выводы
1. Всего в датасете 891 строка.
2. 5 столбцов имеют тип данных `int64`.
3. 2 столбца имеют тип данных `float64`.
4. 5 столбцов имеют тип данных `object`.
5. Столбцы `Age`, `Cabin` и `Embarked` имеют пустые значения (`NaN`).

In [72]:
df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [73]:
def camel_to_snake(column_name):
    return re.sub(r'(?<!^)(?=[A-Z])', '_', column_name).lower()

In [74]:
df.columns = [camel_to_snake(col) for col in df.columns]

In [75]:
df.columns

Index(['passenger_id', 'survived', 'pclass', 'name', 'sex', 'age', 'sib_sp',
       'parch', 'ticket', 'fare', 'cabin', 'embarked'],
      dtype='object')

<h2>Работа с числовыми данными</h2>

In [76]:
df.describe().round(2)

Unnamed: 0,passenger_id,survived,pclass,age,sib_sp,parch,fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.38,2.31,29.7,0.52,0.38,32.2
std,257.35,0.49,0.84,14.53,1.1,0.81,49.69
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.12,0.0,0.0,7.91
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.45
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.33


## Выводы
1. Средний возраст пассажира - 28-30 лет
2. Средняя стоимость проезда - 32 доллара
3. Медианная стоимость проезда - 14.5 долларов
4. Больше половины пассажиров погибло

<h2>Анализ категориальных данных</h2>

In [77]:
df.describe(include='object')

Unnamed: 0,name,sex,ticket,cabin,embarked
count,891,891,891,204,889
unique,891,2,681,147,3
top,"Dooley, Mr. Patrick",male,347082,G6,S
freq,1,577,7,4,644


In [78]:
df.sex.value_counts()

sex
male      577
female    314
Name: count, dtype: int64

In [79]:
df.fare.value_counts(bins=20)

(-0.513, 25.616]      562
(25.616, 51.233]      170
(51.233, 76.849]       67
(76.849, 102.466]      39
(128.082, 153.699]     16
(102.466, 128.082]     15
(204.932, 230.548]      9
(256.165, 281.781]      6
(486.713, 512.329]      3
(230.548, 256.165]      2
(153.699, 179.315]      2
(179.315, 204.932]      0
(281.781, 307.398]      0
(307.398, 333.014]      0
(358.63, 384.247]       0
(333.014, 358.63]       0
(384.247, 409.863]      0
(409.863, 435.48]       0
(435.48, 461.096]       0
(461.096, 486.713]      0
Name: count, dtype: int64

In [80]:
df.fare.value_counts(bins=20)[0]/df.shape[0]*100

np.float64(63.07519640852974)

- 63% пассажиров приобрели билет стоимостью до 25 долларов

In [81]:
df.duplicated().value_counts()

False    891
Name: count, dtype: int64

- Дубликатов нет

<h2>Сортировка DataFrame</h2>

In [82]:
df.sort_values(by='age').head(5)

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked
803,804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C
755,756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S
469,470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C
644,645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C
78,79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29.0,,S


In [83]:
df.sort_values(by='age', ascending=False).head(5)

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked
630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0,A23,S
851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.775,,S
493,494,0,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
96,97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q


<h2>Фильтрация данных</h2>

In [84]:
# Булева маска
df.sex == 'male'

0       True
1      False
2      False
3      False
4       True
       ...  
886     True
887    False
888    False
889     True
890     True
Name: sex, Length: 891, dtype: bool

In [85]:
df.loc[df.sex == 'male'].head(5)

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S


In [86]:
df.loc[df.sex == 'male', ['name', 'age']]

Unnamed: 0,name,age
0,"Braund, Mr. Owen Harris",22.0
4,"Allen, Mr. William Henry",35.0
5,"Moran, Mr. James",
6,"McCarthy, Mr. Timothy J",54.0
7,"Palsson, Master. Gosta Leonard",2.0
...,...,...
883,"Banfield, Mr. Frederick James",28.0
884,"Sutehall, Mr. Henry Jr",25.0
886,"Montvila, Rev. Juozas",27.0
889,"Behr, Mr. Karl Howell",26.0


In [87]:
((df.sex == 'male') & (df.age > 40)).value_counts()

False    789
True     102
Name: count, dtype: int64

In [88]:
df.loc[(df.sex == 'male') & (df.age > 40), ['name', 'age']].sort_values(by='age')

Unnamed: 0,name,age
153,"van Billiard, Mr. Austin Blyler",40.5
525,"Farrell, Mr. James",40.5
860,"Hansen, Mr. Claus Peter",41.0
761,"Nirva, Mr. Iisakki Antino Aijo",41.0
197,"Olsen, Mr. Karl Siegwart Andreas",42.0
...,...,...
116,"Connors, Mr. Patrick",70.5
493,"Artagaveytia, Mr. Ramon",71.0
96,"Goldschmidt, Mr. George B",71.0
851,"Svensson, Mr. Johan",74.0


<h4>Добавляем новый столбец в датафрейм</h4>

In [89]:
rule_cheap = df.fare < 25
df.loc[rule_cheap, 'cheap_price'] = 'Yes'
df.loc[~rule_cheap, 'cheap_price'] = 'No'
df[['name', 'age', 'fare', 'cheap_price']].head().round()

Unnamed: 0,name,age,fare,cheap_price
0,"Braund, Mr. Owen Harris",22.0,7.0,Yes
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0,71.0,No
2,"Heikkinen, Miss. Laina",26.0,8.0,Yes
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,53.0,No
4,"Allen, Mr. William Henry",35.0,8.0,Yes


<h4>filter() и query()</h4>

In [90]:
df.filter(items=[0, 1], axis=0)

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked,cheap_price
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,Yes
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,No


In [91]:
df.filter(like='p', axis=1)

Unnamed: 0,passenger_id,pclass,sib_sp,parch,cheap_price
0,1,3,1,0,Yes
1,2,1,1,0,No
2,3,3,0,0,Yes
3,4,1,1,0,No
4,5,3,0,0,Yes
...,...,...,...,...,...
886,887,2,0,0,Yes
887,888,1,0,0,No
888,889,3,1,2,Yes
889,890,1,0,0,No


In [92]:
mean_age = df.age.mean()
number = 50
df.query('age > @mean_age | fare > @number')[['name', 'age', 'fare']]

Unnamed: 0,name,age,fare
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0,71.2833
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,53.1000
4,"Allen, Mr. William Henry",35.0,8.0500
6,"McCarthy, Mr. Timothy J",54.0,51.8625
11,"Bonnell, Miss. Elizabeth",58.0,26.5500
...,...,...,...
873,"Vander Cruyssen, Mr. Victor",47.0,9.0000
879,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",56.0,83.1583
881,"Markun, Mr. Johann",33.0,7.8958
885,"Rice, Mrs. William (Margaret Norton)",39.0,29.1250


Метод merge()

In [93]:
df1 = pd.DataFrame({
    'name': ['Петя', 'Вася'],
    'age': [23, 35],
})

df2 = pd.DataFrame({
    'name': ['Петя', 'Вася'],
    'salary': [3500, 4200]
})

df1.merge(df2, on='name')

Unnamed: 0,name,age,salary
0,Петя,23,3500
1,Вася,35,4200


Метод isin()

In [94]:
mask = df.passenger_id.isin([1, 2, 3, 5, 7, 9])
mask

0       True
1       True
2       True
3      False
4       True
       ...  
886    False
887    False
888    False
889    False
890    False
Name: passenger_id, Length: 891, dtype: bool

In [95]:
df.loc[mask]

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked,cheap_price
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,Yes
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,No
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,Yes
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,Yes
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S,No
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S,Yes


In [96]:
df.loc[~mask]

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked,cheap_price
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,No
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q,Yes
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.0750,,S,Yes
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C,No
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7000,G6,S,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,Yes
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,No
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S,Yes
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,No


<h2>Работа с пропущенными значениями</h2>

либо удалить значения, либо заполнить чем-то

In [104]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 889 entries, 0 to 890
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   passenger_id  889 non-null    int64  
 1   survived      889 non-null    int64  
 2   pclass        889 non-null    int64  
 3   name          889 non-null    object 
 4   sex           889 non-null    object 
 5   age           889 non-null    float64
 6   sib_sp        889 non-null    int64  
 7   parch         889 non-null    int64  
 8   ticket        889 non-null    object 
 9   fare          889 non-null    float64
 10  cabin         889 non-null    object 
 11  embarked      889 non-null    object 
 12  cheap_price   889 non-null    object 
dtypes: float64(2), int64(5), object(6)
memory usage: 97.2+ KB


In [98]:
df.age.mean()

np.float64(29.69911764705882)

In [99]:
df.age.median()

np.float64(28.0)

In [100]:
df.age = df.age.fillna(df.age.median())

In [101]:
df.cabin.fillna('Unknown', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df.cabin.fillna('Unknown', inplace=True)


In [102]:
df.describe(include='object')

Unnamed: 0,name,sex,ticket,cabin,embarked,cheap_price
count,891,891,891,891,889,891
unique,891,2,681,148,3,2
top,"Dooley, Mr. Patrick",male,347082,Unknown,S,Yes
freq,1,577,7,687,644,557


In [103]:
df.dropna(inplace=True)

In [105]:
df.reset_index(drop=True, inplace=True)

In [106]:
df

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked,cheap_price
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,Unknown,S,Yes
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,No
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,Unknown,S,Yes
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,No
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,Unknown,S,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,Unknown,S,Yes
885,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,No
886,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,28.0,1,2,W./C. 6607,23.4500,Unknown,S,Yes
887,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,No


<h2>Группировка данных, агрегирующие функции, сводные таблицы</h2>

In [115]:
df.groupby(by='sex', as_index=False)[['age', 'fare']].agg('mean').round(2)

Unnamed: 0,sex,age,fare
0,female,27.79,44.25
1,male,30.14,25.52


In [118]:
df.groupby(by=['sex', 'survived'], as_index=False).agg(fare_median = ('fare', 'median'),
                                         age_median = ('age', 'median'),
                                         survived_mean = ('survived', 'mean'),
                                         pclass_count = ('pclass', 'count'))

Unnamed: 0,sex,survived,fare_median,age_median,survived_mean,pclass_count
0,female,0,15.2458,28.0,0.0,81
1,female,1,26.0,28.0,1.0,231
2,male,0,9.41665,28.0,0.0,468
3,male,1,26.2875,28.0,1.0,109


In [119]:
df.pivot_table(columns='sex', values=['age', 'fare'], aggfunc='median')

sex,female,male
age,28.0,28.0
fare,23.0,10.5


<h2>Функции</h2>

In [125]:
df[['fare', 'survived']].apply('sum')

fare        28533.9493
survived      340.0000
dtype: float64

In [126]:
df['age']

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
884    27.0
885    19.0
886    28.0
887    26.0
888    32.0
Name: age, Length: 889, dtype: float64

In [127]:
def make_age(age):
    if age < 14:
        return 'children'
    elif age >= 14 and age < 18:
        return 'teenager'
    elif age >= 18 and age < 35:
        return 'adult'
    elif age >= 35 and age < 65:
        return 'mature'
    elif age >= 65:
        return 'ancient'

In [129]:
df['age'].apply(make_age).value_counts()

age
adult       543
mature      222
children     71
teenager     42
ancient      11
Name: count, dtype: int64

In [135]:
df['age_class'] = df['age'].apply(make_age)
df

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked,cheap_price,age_class
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,Unknown,S,Yes,adult
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,No,mature
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,Unknown,S,Yes,adult
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,No,mature
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,Unknown,S,Yes,mature
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,Unknown,S,Yes,adult
885,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,No,adult
886,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,28.0,1,2,W./C. 6607,23.4500,Unknown,S,Yes,adult
887,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,No,adult


In [137]:
df['survived'] = df['survived'].apply(lambda x: 'alive' if x == 1 else 'died')

In [138]:
df

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked,cheap_price,age_class
0,1,died,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,Unknown,S,Yes,adult
1,2,died,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,No,mature
2,3,died,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,Unknown,S,Yes,adult
3,4,died,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,No,mature
4,5,died,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,Unknown,S,Yes,mature
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,887,died,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,Unknown,S,Yes,adult
885,888,died,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,No,adult
886,889,died,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,28.0,1,2,W./C. 6607,23.4500,Unknown,S,Yes,adult
887,890,died,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,No,adult


In [141]:
df['class_ticket'] = df.apply(lambda x: 'rich_ticket' if x.fare > 35 else 'cheap_ticket', axis=1)

In [142]:
df

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked,cheap_price,age_class,class_ticket
0,1,died,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,Unknown,S,Yes,adult,cheap_ticket
1,2,died,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,No,mature,rich_ticket
2,3,died,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,Unknown,S,Yes,adult,cheap_ticket
3,4,died,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,No,mature,rich_ticket
4,5,died,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,Unknown,S,Yes,mature,cheap_ticket
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,887,died,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,Unknown,S,Yes,adult,cheap_ticket
885,888,died,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,No,adult,cheap_ticket
886,889,died,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,28.0,1,2,W./C. 6607,23.4500,Unknown,S,Yes,adult,cheap_ticket
887,890,died,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,No,adult,cheap_ticket
