In [1]:
# Kütüphaneler
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier

In [2]:
# Veriyi yükle
train_df = pd.read_csv('/kaggle/input/titanic/train.csv')
test_df = pd.read_csv('/kaggle/input/titanic/test.csv')

In [3]:
test_df.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [4]:
print("Train sütunları:", train_df.columns.tolist())

Train sütunları: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']


In [5]:
print("Test sütunları:", test_df.columns.tolist())

Test sütunları: ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']


In [6]:
print("\nTrain eksik veriler:\n", train_df.isnull().sum())


Train eksik veriler:
 PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64


In [7]:
print("\nTest eksik veriler:\n", test_df.isnull().sum())


Test eksik veriler:
 PassengerId      0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64


In [8]:
print("Train sütunları:", train_df.columns.tolist())
print("Test sütunları:", test_df.columns.tolist())
print("\nTrain eksik veriler:\n", train_df.isnull().sum())
print("\nTest eksik veriler:\n", test_df.isnull().sum())
print("\nTrain Sex sütunu (ilk 10 satır):\n", train_df['Sex'].head(10))

Train sütunları: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']
Test sütunları: ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']

Train eksik veriler:
 PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

Test eksik veriler:
 PassengerId      0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

Train Sex sütunu (ilk 10 satır):
 0      male
1    female
2    female
3    female
4      male
5      male
6      male
7      male
8    female
9    female
Name: Sex, dtype: object


In [9]:
# Ön işleme
train_df['Age'] = train_df['Age'].fillna(train_df['Age'].median())
test_df['Age'] = test_df['Age'].fillna(test_df['Age'].median())

In [10]:
# Fare: Medyan ile doldur
train_df['Fare'] = train_df['Fare'].fillna(train_df['Fare'].median())
test_df['Fare'] = test_df['Fare'].fillna(test_df['Fare'].median())

In [11]:
# Sex: Eksik değerleri mod ile doldur ve sayısala çevir
if train_df['Sex'].isnull().sum() > 0:
    print("Sex sütununda eksik veri var, mod ile dolduruluyor.")
    train_df['Sex'] = train_df['Sex'].fillna(train_df['Sex'].mode()[0])
if test_df['Sex'].isnull().sum() > 0:
    test_df['Sex'] = test_df['Sex'].fillna(test_df['Sex'].mode()[0])
train_df['Sex'] = train_df['Sex'].map({'male': 0, 'female': 1})
test_df['Sex'] = test_df['Sex'].map({'male': 0, 'female': 1})

In [12]:
# Embarked: Varsa doldur ve dummy değişkenlere çevir
if 'Embarked' in train_df.columns:
    train_df['Embarked'] = train_df['Embarked'].fillna(train_df['Embarked'].mode()[0])
    train_df = pd.get_dummies(train_df, columns=['Embarked'], drop_first=True)
else:
    print("Train'de Embarked sütunu yok, dummy değişkenler kullanılıyor.")

if 'Embarked' in test_df.columns:
    test_df['Embarked'] = test_df['Embarked'].fillna(test_df['Embarked'].mode()[0])
    test_df = pd.get_dummies(test_df, columns=['Embarked'], drop_first=True)
else:
    print("Test'de Embarked sütunu yok, dummy değişkenler kullanılıyor.")

In [13]:
# Gereksiz sütunları kaldır
train_df = train_df.drop(['Cabin', 'Name', 'Ticket'], axis=1, errors='ignore')
test_df = test_df.drop(['Cabin', 'Name', 'Ticket'], axis=1, errors='ignore')

In [14]:
# Özellikler
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']
if 'Embarked_Q' in train_df.columns and 'Embarked_S' in train_df.columns:
    features += ['Embarked_Q', 'Embarked_S']

In [15]:
X = train_df[features]
y = train_df['Survived']
X_test = test_df[features]

In [16]:
# Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)
predictions = model.predict(X_test)

In [17]:
submission = pd.DataFrame({'PassengerId': test_df['PassengerId'], 'Survived': predictions})
submission.to_csv('submission.csv', index=False)
print("submission.csv oluşturuldu!")
    

submission.csv oluşturuldu!
