**Importing Libraries**

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


**Preprocessing Data**

In [None]:
train_data = pd.read_csv("/kaggle/input/titanic/train.csv")
test_data = pd.read_csv("/kaggle/input/titanic/test.csv")

#Reviewing data to see if there is anything that stands out
train_data.describe(include='all')

In [None]:
#Checking the types for each column
train_data.info()

In [None]:
#Cleaning the Data

#Dropping cabin due to few values in data; and name, fare, and ticket are too unique in my opinion; embarked decreased score for both models so it was removed
Clean_Train_Data = train_data.drop(['Cabin','Name','Fare','Ticket','Embarked'], axis = 1)

#Filling in missing data in Age and embarked
Clean_Train_Data['Age'].fillna(int(Clean_Train_Data['Age'].mean()), inplace=True)

#Checking to see if dtype and null values changed
Clean_Train_Data.info()


In [None]:
#Repeating process for the test data as well

Clean_Test_Data = test_data.drop(['Cabin','Name','Fare','Ticket'], axis = 1)

Clean_Test_Data['Age'].fillna(int(Clean_Train_Data["Age"].mean()), inplace=True)

Clean_Test_Data.info()

In [None]:
#Looking at different variables from training data
#0=No and 1=Yes

sns.countplot(x='Survived', hue='Survived', data=train_data)
plt.xlabel('Survived')
plt.ylabel('Count')
plt.show()

In [None]:
#Survival of the genders
#Where 0=No and 1=Yes
sns.countplot(x="Sex", hue='Survived', data=train_data)
plt.title("Survival of different Gender")
plt.xlabel('Sex')
plt.ylabel('Count')
plt.show()

In [None]:
#1st, 2nd, 3rd Class Passangers
#1st = Upper Class, 2nd = Middle Class, 3rd = Lower

sns.countplot(x='Pclass', hue='Survived', data=train_data)
plt.xlabel('Pclass')
plt.ylabel('Count')
plt.title("Survival of different classes")
plt.show()

In [None]:
# Number of siblings / spouses aboard the Titanic
sns.countplot(x='SibSp', hue='Survived', data=train_data)
plt.xlabel('SibSp')
plt.ylabel('Count')
plt.title("Survival of SibSp")
plt.show()

In [None]:
# Number of parents / children aboard the Titanic
#Some children traveled only with a nanny, therefore parch=0 for them.
sns.countplot(x='Parch', hue="Survived", data=train_data)
plt.xlabel('Parch')
plt.ylabel('Count')
plt.title("Survival of families")
plt.show()

**Trying Different Models**
*Using the Random Forest and SVM models to test data then using cross validation scores to see accuracy*

In [None]:
#Random Forest Model
from sklearn.ensemble import RandomForestClassifier

y = Clean_Train_Data['Survived']

features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch']

X = pd.get_dummies(Clean_Train_Data[features])
X_test = pd.get_dummies(Clean_Test_Data[features])

RandomForest = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)
RandomForest.fit(X, y)
predictions = RandomForest.predict(X_test)

from sklearn.model_selection import cross_val_score
scores = cross_val_score(RandomForest, X, y)
print('Random Forest Scores:',scores)
print('Random Forest:',scores.mean())

output = pd.DataFrame({'PassengerId': Clean_Test_Data.PassengerId, 'Survived': predictions})
output.to_csv('submission.csv', index=False)
print("Your submission was successfully saved!")


In [None]:
#SVM Model
from os import readv
from pandas import *
import numpy as np
import matplotlib.pyplot as plt 
from sklearn import svm 
from sklearn import preprocessing


y = Clean_Train_Data['Survived']

features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch']

X = pd.get_dummies(Clean_Train_Data[features])
X_test = pd.get_dummies(Clean_Test_Data[features])

SVM=svm.SVC(kernel='rbf', gamma=.15) 
X= np.asarray(X)
X_test=np.asarray(X_test)
y= np.asarray(y)
SVM.fit(X,y)
predictions = SVM.predict(X_test)

from sklearn.model_selection import cross_val_score
scores = cross_val_score(SVM, X, y)
print('SVM Scores:',scores)
print('SVM:',scores.mean())

#output = pd.DataFrame({'PassengerId': Clean_Test_Data.PassengerId, 'Survived': predictions})
#output.to_csv('submission.csv', index=False)
#print("Your submission was successfully saved!")
