In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Data Collection and Processing

In [None]:
# load the data from csv file to Pandas DataFrame
titanic_data = pd.read_csv("train.csv")

In [None]:
# Printing first five rows of the dataFrame
titanic_data.head()

In [None]:
# number of draws and columns
titanic_data.shape

In [None]:
# getting some information abouth the data
titanic_data.info()

In [None]:
# check the number of missing values in each column
titanic_data.isnull().sum()

### Handling the Missing Value

In [None]:
# drop the "Cabin" column from the dataFrame
titanic_data = titanic_data.drop(columns = "Cabin", axis = 1)

In [None]:
# Replacing the missing value in "Age" column with mean value
titanic_data["Age"].fillna(titanic_data["Age"].mean(), inplace = True)

In [None]:
# finding the mode value of "Embarked" column
print(titanic_data["Embarked"].mode())

In [None]:
print(titanic_data["Embarked"].mode()[0])

In [None]:
# replacing the missing value Embarked column with mode value
titanic_data["Embarked"].fillna(titanic_data["Embarked"].mode()[0], inplace = True)

In [None]:
# check the missing value
titanic_data.isnull().sum()

In [None]:
# ing some statistical measures about the data
titanic_data.describe()

In [None]:
# finding the number of people Survived and nor Survived
titanic_data["Survived"].value_counts()

## Data Visualization

In [None]:
sns.set()

In [None]:
# making a count plot for "Survived" column
sns.countplot(x = "Survived", data = titanic_data)

In [None]:
titanic_data["Sex"].value_counts()

In [None]:
# making a count plot for "Sex" column
sns.countplot(x = "Sex", data = titanic_data)

In [None]:
# Number of survors Gender wise
sns.countplot(x = "Sex", hue = "Survived", data = titanic_data)

In [None]:
# making a count plot for "Pclass" column
sns.countplot(x = "Pclass", data = titanic_data)

In [None]:
sns.countplot(x = "Pclass", hue = "Survived", data = titanic_data)

#### Encoding the Categorical Columns

In [None]:
titanic_data["Sex"].value_counts()

In [None]:
titanic_data["Embarked"].value_counts()

In [None]:
# Converting categorical columns
titanic_data.replace({"Sex": {"male": 0, "female": 1}, "Embarked": {"S": 0, "C": 1, "Q": 2}}, inplace=True)

In [None]:
titanic_data.head()

#### Sperating features & Target

In [None]:
X = titanic_data.drop(columns = ["PassengerId", "Name", "Ticket", "Survived"], axis = 1)
Y = titanic_data["Survived"]

In [None]:
print(X)

In [None]:
print(Y)

### Splitting the data into Training & Testing

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

### Model Training

##### LogisticRegression

In [None]:
model = LogisticRegression()

In [None]:
# training the LogisticRegression model with training data
model.fit(X_train, Y_train)

#### Model Evaluation
##### Accuracy score

In [None]:
# accuracy on trainig data
X_train_prediction = model.predict(X_train)

In [None]:
print(X_train_prediction)

In [None]:
training_data_Accuracy = accuracy_score(Y_train, X_train_prediction)
print("Accuracy Score of training data: ", training_data_Accuracy)

In [None]:
# accuracy on test data
X_test_prediction = model.predict(X_test)

In [None]:
print(X_test_prediction)

In [None]:
testing_data_Accuracy = accuracy_score(Y_test, X_test_prediction)
print("Accuracy Score of test data: ", testing_data_Accuracy)

In [None]:
accuracy_df = pd.DataFrame({'Accuracy': [training_data_Accuracy]})

# Export the DataFrame to a CSV file
accuracy_df.to_csv('training_accuracy.csv', index=False)