In [68]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


df = sns.load_dataset("titanic")


print(df.head())
print(df.info())
print(df.isnull().sum())
print(df.describe(include="all"))


   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-nu

In [70]:
df["age"].fillna(df["age"].median(), inplace = True)
df["embark_town"].fillna(df["embark_town"].mode()[0], inplace = True)
df['embarked'].fillna(df['embarked'].mode()[0], inplace=True)
df.drop(columns = ['deck'], inplace = True,errors = "ignore")
print(df.isnull().sum())

survived       0
pclass         0
sex            0
age            0
sibsp          0
parch          0
fare           0
embarked       0
class          0
who            0
adult_male     0
embark_town    0
alive          0
alone          0
dtype: int64


In [72]:
df['sex'] = df['sex'].map({'male': 1, 'female': 0})

df = pd.get_dummies(df, columns=['embarked', 'class', 'who', 'embark_town', 'alone'], drop_first=True)

print(df.head())


   survived  pclass  sex   age  sibsp  parch     fare  adult_male alive  \
0         0       3    1  22.0      1      0   7.2500        True    no   
1         1       1    0  38.0      1      0  71.2833       False   yes   
2         1       3    0  26.0      0      0   7.9250       False   yes   
3         1       1    0  35.0      1      0  53.1000       False   yes   
4         0       3    1  35.0      0      0   8.0500        True    no   

   embarked_Q  embarked_S  class_Second  class_Third  who_man  who_woman  \
0       False        True         False         True     True      False   
1       False       False         False        False    False       True   
2       False        True         False         True    False       True   
3       False        True         False        False    False       True   
4       False        True         False         True     True      False   

   embark_town_Queenstown  embark_town_Southampton  alone_True  
0                   False  

In [74]:
from sklearn.preprocessing import StandardScaler

num_cols = ['age', 'fare', 'sibsp', 'parch']

scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

print(df.head())


   survived  pclass  sex       age     sibsp     parch      fare  adult_male  \
0         0       3    1 -0.565736  0.432793 -0.473674 -0.502445        True   
1         1       1    0  0.663861  0.432793 -0.473674  0.786845       False   
2         1       3    0 -0.258337 -0.474545 -0.473674 -0.488854       False   
3         1       1    0  0.433312  0.432793 -0.473674  0.420730       False   
4         0       3    1  0.433312 -0.474545 -0.473674 -0.486337        True   

  alive  embarked_Q  embarked_S  class_Second  class_Third  who_man  \
0    no       False        True         False         True     True   
1   yes       False       False         False        False    False   
2   yes       False        True         False         True    False   
3   yes       False        True         False        False    False   
4    no       False        True         False         True     True   

   who_woman  embark_town_Queenstown  embark_town_Southampton  alone_True  
0      False    

In [76]:
y = df['survived']

X = df.drop(columns=['survived'])
X = X.drop(columns=['alive'], errors='ignore')
print("Bağımsız değişkenlerin boyutu:", X.shape)
print("Bağımlı değişkenin boyutu:", y.shape)


Bağımsız değişkenlerin boyutu: (891, 16)
Bağımlı değişkenin boyutu: (891,)


In [78]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print("Eğitim seti boyutu:", X_train.shape)
print("Test seti boyutu:", X_test.shape)


Eğitim seti boyutu: (712, 16)
Test seti boyutu: (179, 16)


In [80]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)

y_pred = log_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Lojistik Regresyon Doğruluk Skoru: {accuracy:.4f}")
print("\nKarışıklık Matrisi:\n", conf_matrix)
print("\nSınıflandırma Raporu:\n", class_report)


Lojistik Regresyon Doğruluk Skoru: 0.8324

Karışıklık Matrisi:
 [[98 12]
 [18 51]]

Sınıflandırma Raporu:
               precision    recall  f1-score   support

           0       0.84      0.89      0.87       110
           1       0.81      0.74      0.77        69

    accuracy                           0.83       179
   macro avg       0.83      0.82      0.82       179
weighted avg       0.83      0.83      0.83       179



## KNN MODELİ

In [82]:
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier(n_neighbors=5)

knn_model.fit(X_train, y_train)

y_pred_knn = knn_model.predict(X_test)

accuracy_knn = accuracy_score(y_test, y_pred_knn)
conf_matrix_knn = confusion_matrix(y_test, y_pred_knn)
class_report_knn = classification_report(y_test, y_pred_knn)

print(f"KNN Doğruluk Skoru: {accuracy_knn:.4f}")
print("\nKarışıklık Matrisi:\n", conf_matrix_knn)
print("\nSınıflandırma Raporu:\n", class_report_knn)


KNN Doğruluk Skoru: 0.8268

Karışıklık Matrisi:
 [[98 12]
 [19 50]]

Sınıflandırma Raporu:
               precision    recall  f1-score   support

           0       0.84      0.89      0.86       110
           1       0.81      0.72      0.76        69

    accuracy                           0.83       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.83      0.83      0.82       179



## SVM

In [84]:
 from sklearn.svm import SVC

svm_model = SVC(kernel='linear')

svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
conf_matrix_svm = confusion_matrix(y_test, y_pred_svm)
class_report_svm = classification_report(y_test, y_pred_svm)

print(f"SVM Doğruluk Skoru: {accuracy_svm:.4f}")
print("\nKarışıklık Matrisi:\n", conf_matrix_svm)
print("\nSınıflandırma Raporu:\n", class_report_svm)


SVM Doğruluk Skoru: 0.8156

Karışıklık Matrisi:
 [[98 12]
 [21 48]]

Sınıflandırma Raporu:
               precision    recall  f1-score   support

           0       0.82      0.89      0.86       110
           1       0.80      0.70      0.74        69

    accuracy                           0.82       179
   macro avg       0.81      0.79      0.80       179
weighted avg       0.81      0.82      0.81       179



## Desicion Tree

In [86]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42)

dt_model.fit(X_train, y_train)

y_pred_dt = dt_model.predict(X_test)

accuracy_dt = accuracy_score(y_test, y_pred_dt)
conf_matrix_dt = confusion_matrix(y_test, y_pred_dt)
class_report_dt = classification_report(y_test, y_pred_dt)

print(f"Karar Ağacı Doğruluk Skoru: {accuracy_dt:.4f}")
print("\nKarışıklık Matrisi:\n", conf_matrix_dt)
print("\nSınıflandırma Raporu:\n", class_report_dt)


Karar Ağacı Doğruluk Skoru: 0.8045

Karışıklık Matrisi:
 [[94 16]
 [19 50]]

Sınıflandırma Raporu:
               precision    recall  f1-score   support

           0       0.83      0.85      0.84       110
           1       0.76      0.72      0.74        69

    accuracy                           0.80       179
   macro avg       0.79      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179



## Random Forest

In [88]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
conf_matrix_rf = confusion_matrix(y_test, y_pred_rf)
class_report_rf = classification_report(y_test, y_pred_rf)

print(f"Random Forest Doğruluk Skoru: {accuracy_rf:.4f}")
print("\nKarışıklık Matrisi:\n", conf_matrix_rf)
print("\nSınıflandırma Raporu:\n", class_report_rf)


Random Forest Doğruluk Skoru: 0.8324

Karışıklık Matrisi:
 [[98 12]
 [18 51]]

Sınıflandırma Raporu:
               precision    recall  f1-score   support

           0       0.84      0.89      0.87       110
           1       0.81      0.74      0.77        69

    accuracy                           0.83       179
   macro avg       0.83      0.82      0.82       179
weighted avg       0.83      0.83      0.83       179



## XGBoost

In [90]:
import xgboost as xgb
from xgboost import XGBClassifier

xgb_model = XGBClassifier(random_state=42)

xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_test)

accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
conf_matrix_xgb = confusion_matrix(y_test, y_pred_xgb)
class_report_xgb = classification_report(y_test, y_pred_xgb)

print(f"XGBoost Doğruluk Skoru: {accuracy_xgb:.4f}")
print("\nKarışıklık Matrisi:\n", conf_matrix_xgb)
print("\nSınıflandırma Raporu:\n", class_report_xgb)


XGBoost Doğruluk Skoru: 0.7989

Karışıklık Matrisi:
 [[92 18]
 [18 51]]

Sınıflandırma Raporu:
               precision    recall  f1-score   support

           0       0.84      0.84      0.84       110
           1       0.74      0.74      0.74        69

    accuracy                           0.80       179
   macro avg       0.79      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179



## Linear Regression

In [92]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

linear_reg = LinearRegression()

linear_reg.fit(X_train, y_train)

y_pred_linear = linear_reg.predict(X_test)

r2_linear = r2_score(y_test, y_pred_linear)
mse_linear = mean_squared_error(y_test, y_pred_linear)

print("Lineer Regresyon R² Skoru:", r2_linear)
print("Lineer Regresyon Mean Squared Error (MSE):", mse_linear)


Lineer Regresyon R² Skoru: 0.41860331357139946
Lineer Regresyon Mean Squared Error (MSE): 0.1377235682404756
