# タイタニックデータ可視化

| カラム名 | 説明 |
| --- | --- |
| PassengerId | 乗客の一意の識別番号 |
| Survived | 乗客が生存したかどうか（1: 生存, 0: 死亡） |
| Pclass | 乗客の社会経済的地位を示すクラス（1st, 2nd, 3rd） |
| Name | 乗客の名前 |
| Sex | 乗客の性別 |
| Age | 乗客の年齢 |
| SibSp | タイタニック号に同乗している兄弟または配偶者の数 |
| Parch | タイタニック号に同乗している親または子供の数 |
| Ticket | チケット番号 |
| Fare | 乗船料金 |
| Cabin | 部屋番号 |
| Embarked | 乗船した港（C: Cherbourg, Q: Queenstown, S: Southampton） |

In [1]:
import pandas as pd
from IPython.display import display

# 絶対パスを指定
file_path = './data/titanic/train.csv'

# データフレームの読み込み
df = pd.read_csv(file_path)

# 全ての行と列を表示する設定
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
df.shape

(891, 12)

In [3]:
# データフレームの表示
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


| カラム名 | 説明 |
| --- | --- |
| count | 件数 |
| mean | 平均（算術平均） |
| std | 標準偏差 |
| min | 最小値 |
| 25% | 第１四分位数 |
| 50% | 中央値（第２四分位数） |
| 75% | 第３四分位数 |
| max | 最大値 |

In [10]:
#基本統計量の表示
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [12]:
#NameとSurvivedのみ抽出
df[['Name', 'Survived']].head()


Unnamed: 0,Name,Survived
0,"Braund, Mr. Owen Harris",0
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1
2,"Heikkinen, Miss. Laina",1
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1
4,"Allen, Mr. William Henry",0


In [13]:
# 生存した女性のデータを抽出
df[(df['Embarked'] == 'Q') & (df['Sex'] == 'female')].head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
22,23,1,3,"McGowan, Miss. Anna ""Annie""",female,15.0,0,0,330923,8.0292,,Q
28,29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q
32,33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q
44,45,1,3,"Devaney, Miss. Margaret Delia",female,19.0,0,0,330958,7.8792,,Q
47,48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q


In [15]:
#欠損値の検出
missing_values_per_column = df.isnull().sum()

#欠損値のあるカラムを出力
missing_values_per_column

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [16]:
# 生存者と非生存者の人数を計算
survived_count = df[df['Survived'] == 1]['PassengerId'].count()
not_survived_count = df[df['Survived'] == 0]['PassengerId'].count()

# 結果を表示
print("生存者の人数:", survived_count)
print("非生存者の人数:", not_survived_count)

生存者の人数: 342
非生存者の人数: 549


In [17]:
# Fare列の値の合計を計算
total_fare = df['Fare'].sum()

print(f'Total Fare: {total_fare}')

Total Fare: 28693.9493


In [19]:
# Fareの平均を計算
average_fare = df['Fare'].mean()
# Fareの平均を小数点第一位まで丸める
average_fare = round(average_fare,1)

# 結果を表示
print("Fareの平均:", average_fare)


Fareの平均: 32.2


In [20]:
# Fareの中央値を計算
median_fare = df['Fare'].median()
# Fareの中央値を小数点第一位まで丸める
median_fare = round(median_fare,1)

# 結果を表示
print("Fareの中央値:", median_fare)


Fareの中央値: 14.5


In [21]:
# Ageの最大値と最小値を計算
max_age = df['Age'].max()
min_age = df['Age'].min()

# 結果を表示
print("Ageの最大値:", max_age)
print("Ageの最小値:", min_age)

Ageの最大値: 80.0
Ageの最小値: 0.42
