Skip to content
Fetching contributors… Cannot retrieve contributors at this time
43 lines (34 sloc) 1.5 KB
 import pandas as pd from sklearn.metrics import confusion_matrix from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split # Importing the dataset train = pd.read_csv("train.csv") test = pd.read_csv("test.csv") # Encode Sex string to numeric train["Sex"] = pd.factorize(train["Sex"]) test["Sex"] = pd.factorize(test["Sex"]) # Fill in the missing values train["Fare"] = train["Fare"].fillna((train["Fare"].mean())) test["Fare"] = test["Fare"].fillna((test["Fare"].mean())) train["Age"] = train["Age"].fillna((train["Age"].mean())) test["Age"] = test["Age"].fillna((test["Age"].mean())) # Prepare train/test set. columns = ["Fare", "Sex", "Pclass", "Age"] X_all = train[columns] y_all = train["Survived"] X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size = 0.2, random_state = 0) # Fit and predict on splitted train data. classifier = LogisticRegression(random_state = 0, solver="lbfgs", max_iter = 100) classifier.fit(X_train, y_train) prediction = prediction = classifier.predict(X_test) cm = confusion_matrix(y_test, prediction) print(f"Confusion matrix:\n{cm}") print(f"Accuracy: {(cm + cm) / len(y_test)}") # Fit and predict on all data. classifier.fit(X_all, y_all) prediction = classifier.predict(test[columns]) # Prepare the submission data submission_df = {"PassengerId": test["PassengerId"],"Survived": prediction} submission = pd.DataFrame(submission_df) submission.to_csv(f"predictions/titanic_simplest.csv", index=False)
You can’t perform that action at this time.