# Import Library

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Dataset import

In [9]:
df = pd.read_csv('train.csv', usecols=['Age','Pclass','SibSp','Parch','Survived'])
df.head()

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch
0,0,3,22.0,1,0
1,1,1,38.0,1,0
2,1,3,26.0,0,0
3,1,1,35.0,1,0
4,0,3,35.0,0,0


# 1. Fearures Combining

In [10]:
df['family_member'] = df['SibSp'] + df['Parch'] + 1 # 1 for that calculating person
df.head()

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,family_member
0,0,3,22.0,1,0,2
1,1,1,38.0,1,0,2
2,1,3,26.0,0,0,1
3,1,1,35.0,1,0,2
4,0,3,35.0,0,0,1


In [11]:
df.drop(['SibSp','Parch'], axis=1, inplace=True)
df.head()

Unnamed: 0,Survived,Pclass,Age,family_member
0,0,3,22.0,2
1,1,1,38.0,2
2,1,3,26.0,1
3,1,1,35.0,2
4,0,3,35.0,1


## Family size = `Alone`, `Small`, `Medium`, `Lerge`

In [7]:
def family_size(x):
  if x == 1:
    return 'Alone'
  elif x > 1 and x <= 4:
    return 'Small'
  elif x > 4 and x <= 6:
    return 'Medium'
  else:
    return 'Large'

In [12]:
df['family_size'] = df['family_member'].apply(family_size)
df.head()

Unnamed: 0,Survived,Pclass,Age,family_member,family_size
0,0,3,22.0,2,Small
1,1,1,38.0,2,Small
2,1,3,26.0,1,Alone
3,1,1,35.0,2,Small
4,0,3,35.0,1,Alone


In [15]:
df.sample(5)

Unnamed: 0,Survived,Pclass,Age,family_member,family_size
547,1,2,,1,Alone
579,1,3,32.0,1,Alone
88,1,1,23.0,6,Medium
387,1,2,36.0,1,Alone
168,0,1,,1,Alone


# 2. Feature Splitting

In [16]:
df2 = pd.read_csv('train.csv')
df2.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## Apply Splitting on `Name` feature

### Extract `Mr` or `Ms`

In [18]:
df2['Name']

Unnamed: 0,Name
0,"Braund, Mr. Owen Harris"
1,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,"Heikkinen, Miss. Laina"
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,"Allen, Mr. William Henry"
...,...
886,"Montvila, Rev. Juozas"
887,"Graham, Miss. Margaret Edith"
888,"Johnston, Miss. Catherine Helen ""Carrie"""
889,"Behr, Mr. Karl Howell"


In [21]:
df2['Title'] = df2['Name'].str.split(', ', expand=True)[1].str.split('.', expand=True)[0]

In [22]:
df2[['Title', 'Name']]

Unnamed: 0,Title,Name
0,Mr,"Braund, Mr. Owen Harris"
1,Mrs,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,Miss,"Heikkinen, Miss. Laina"
3,Mrs,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,Mr,"Allen, Mr. William Henry"
...,...,...
886,Rev,"Montvila, Rev. Juozas"
887,Miss,"Graham, Miss. Margaret Edith"
888,Miss,"Johnston, Miss. Catherine Helen ""Carrie"""
889,Mr,"Behr, Mr. Karl Howell"


In [30]:
(df2.groupby('Title').mean(numeric_only=True)['Survived']).sort_values(ascending=False)

Unnamed: 0_level_0,Survived
Title,Unnamed: 1_level_1
the Countess,1.0
Mlle,1.0
Sir,1.0
Ms,1.0
Lady,1.0
Mme,1.0
Mrs,0.792
Miss,0.697802
Master,0.575
Col,0.5


# Check is maried or not

In [36]:
df2['Is_Married'] = 'No'
df2['Is_Married'].loc[df2['Title'] == 'Mrs'] = 'Yes'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df2['Is_Married'].loc[df2['Title'] == 'Mrs'] = 'Yes'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Is_Ma

In [37]:
df2[['Title', 'Name', 'Is_Married']].sample(5)

Unnamed: 0,Title,Name,Is_Married
642,Miss,"Skoog, Miss. Margit Elizabeth",No
265,Mr,"Reeves, Mr. David",No
880,Mrs,"Shelley, Mrs. William (Imanita Parrish Hall)",Yes
507,Mr,"Bradley, Mr. George (""George Arthur Brayton"")",No
683,Mr,"Goodwin, Mr. Charles Edward",No
