## Import Libraries

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

import pandas as pd
import numpy as np

## Load Dataset

In [28]:
data_X_train = pd.read_csv('train.csv')
data_X_test = pd.read_csv('test.csv')

In [29]:
data_X_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## Clean and Pre-process Data

In [30]:
le = LabelEncoder()
data_X_train.loc[:,'Embarked'] = le.fit_transform(data_X_train.loc[:,'Embarked'])
data_X_train.loc[:,'Sex'] = le.fit_transform(data_X_train.loc[:,'Sex'])
data_X_train['Age'].fillna(data_X_train['Age'].mean(), inplace=True)
data_X_train['Parch'].fillna(data_X_train['Parch'].mean(), inplace=True)
data_X_train['Fare'].fillna(data_X_train['Fare'].mean(), inplace=True)
data_X_train['SibSp'].fillna(data_X_train['SibSp'].mean(), inplace=True)


data_X_test.loc[:,'Embarked'] = le.fit_transform(data_X_test.loc[:,'Embarked'])
data_X_test.loc[:,'Sex'] = le.fit_transform(data_X_test.loc[:,'Sex'])
data_X_test['Age'].fillna(data_X_test['Age'].mean(), inplace=True)
data_X_test['Parch'].fillna(data_X_test['Parch'].mean(), inplace=True)
data_X_test['Fare'].fillna(data_X_test['Fare'].mean(), inplace=True)
data_X_test['SibSp'].fillna(data_X_test['SibSp'].mean(), inplace=True)

In [31]:
data_X_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.25,,2
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,C85,0
2,3,1,3,"Heikkinen, Miss. Laina",0,26.0,0,0,STON/O2. 3101282,7.925,,2
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,35.0,1,0,113803,53.1,C123,2
4,5,0,3,"Allen, Mr. William Henry",1,35.0,0,0,373450,8.05,,2


## Drop irrelevant columns from the Dataset, and create 'y' label 

In [32]:
X = data_X_train.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Survived'], axis=1)
y = data_X_train['Survived']

X_test = data_X_test.drop(['PassengerId', 'Name', 'Cabin', 'Ticket'], axis=1)

In [37]:
X.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,1,22.0,1,0,7.25,2
1,1,0,38.0,1,0,71.2833,0
2,3,0,26.0,0,0,7.925,2
3,1,0,35.0,1,0,53.1,2
4,3,1,35.0,0,0,8.05,2


In [36]:
y.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

##  Split Data into Train and Validation set

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, 
                                                    random_state=10)

In [8]:
X_val.shape[0] + X_train.shape[0]

891

## Using the LogisticRegression model to fit data

In [9]:
regr = LogisticRegression(random_state=0).fit(X_train, y_train)

In [10]:
print('Training data r-squared:', regr.score(X_train, y_train))

Training data r-squared: 0.797752808988764


In [11]:
predict_X_val = regr.predict(X_val)
predict_X_val.shape

(179,)

In [12]:
predict_X_test = regr.predict(X_test)

In [13]:
accuracy_score(y_val, regr.predict(X_val))

0.8212290502793296

In [14]:
test_ID = data_X_test['PassengerId']
test_ID

0       892
1       893
2       894
3       895
4       896
       ... 
413    1305
414    1306
415    1307
416    1308
417    1309
Name: PassengerId, Length: 418, dtype: int64

In [15]:
final_pred = regr.predict(X_test)

## Results

In [16]:
result = pd.DataFrame({'Passenger ID': test_ID, 'Survived':final_pred})

In [17]:
result

Unnamed: 0,Passenger ID,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0


In [18]:
result.to_csv('results.csv', index=False)