### Titanic - Machine Learning
This project explores the tragic sinking of the Titanic using machine learning. It aims to build a model that predicts passenger survival based on historical data, including demographics, socioeconomic factors, and other relevant variables.

Data : https://www.kaggle.com/competitions/titanic/data 

In [1]:
# Imports 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score

ModuleNotFoundError: No module named 'sklearn'

In [None]:
# Read Files
train = pd.read_csv("/kaggle/input/titanic/train.csv")
test = pd.read_csv("/kaggle/input/titanic/test.csv")

## 1- Explore Data


In [None]:
# used for viewing the first few rows of the DataFrame.
train.head(5)

In [None]:
# shows the last 5 rows by default
train.tail(5)

In [None]:
# provides information about the dimensions of your DataFrame
train.shape

In [None]:
# provide a concise summary of your DataFrame 
train.info()

In [None]:
# explore your DataFrame
train.describe()

In [None]:
# check for missing values 
train.isnull().sum()

In [None]:
# visualizes missing values
sns.heatmap(train.isnull())

## 2- Clean Data
- nfasa5 Column eli na3ref enou mouch bech t2ather fi result
- n3abi Data na9sa

In [None]:
# Clean function
def clean(d):
    d.drop(['Name','Ticket','Cabin','Embarked','Fare'],axis=1,inplace=True)  # Delete Columns
    d.Age= d.Age.fillna(d.Age.median())  # fills missing values
    d.dropna()   # removes rows that contain any missing values
    return d

In [None]:
#clean train dataSet
clean(train)

In [None]:
#clean test dataSet
clean(test)

In [None]:
# visualizes missing values
sns.heatmap(train.isnull())

## 3- Data Analysis

In [None]:
#train.corr() # error: could not convert string to float: 'male'
co= train.corr()
co

In [None]:
sns.heatmap(co,annot=True,fmt=".1f")

In [None]:
train.Survived.value_counts()

In [None]:
train.Sex.value_counts().plot.pie(autopct="%0.2f%%")

In [None]:
sns.countplot(x="Sex", hue="Survived", data=train)

In [None]:
sns.countplot(x ='Pclass', hue = "Survived", data = train)


In [None]:
sns.histplot(train.Age)

## 4- Transform Data

In [None]:
train["Sex"] = pd.get_dummies(train["Sex"], drop_first=True).astype('uint8')
test["Sex"] = pd.get_dummies(test["Sex"], drop_first=True).astype('uint8')


In [None]:
train

## 5- Create model

In [None]:
x= train.drop(["Survived"],axis=1)
y= train.Survived

In [None]:
x_train,x_test,y_train,y_test= train_test_split(x,y,train_size=.8)

In [None]:
accuracies= []
accuracies

In [None]:
# Model Function
def all(model):
    model.fit(x_train,y_train)
    pre= model.predict(x_test)
    accuracy= accuracy_score(pre,y_test)
    print("Accuracy Score= ",accuracy)
    accuracies.append(accuracy)

In [None]:
model1= LogisticRegression()
all(model1)

In [None]:
accuracies

In [None]:
model2= RandomForestClassifier()
all(model2)

In [None]:
model3= GradientBoostingClassifier()
all(model3)

In [None]:
model4= DecisionTreeClassifier()
all(model4)

In [None]:
model5= KNeighborsClassifier()
all(model5)

In [None]:
model6= GaussianNB()
all(model6)

In [None]:
model7= SVC()
all(model7)

In [None]:
print(len(accuracies))

In [None]:
Algorithms = ["LogisticRegression","RandomForestClassifier","GradientBoostingClassifier","DecisionTreeClassifier","KNeighborsClassifier","GaussianNB","SVC"]

In [None]:
Algorithms

In [None]:
new= pd.DataFrame({"Algorithms":Algorithms,"accuracies":accuracies})

In [None]:
new

In [None]:
bestModel= GradientBoostingClassifier()
bestModel.fit(x_train,y_train)

In [None]:
last_prediction = bestModel.predict(test)

In [None]:
idP = test.PassengerId

In [None]:
dataframe= pd.DataFrame({"PassengerID":idP,"Survivrd":last_prediction})

In [None]:
dataframe

In [None]:
dataframe.to_csv("submission.csv",index=False)