In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
import os

# Check current working directory
print("Current working directory:", os.getcwd())

# ✅ Load the CORRECT dataset
df = pd.read_csv("/content/train.csv")  # Make sure train.csv exists in this path

# Optional: See column names
print("Columns in dataset:", df.columns)

# ✅ Drop columns that are not useful
df = df.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"])

# ✅ Handle missing values
df["Age"] = df["Age"].fillna(df["Age"].median())
df["Embarked"] = df["Embarked"].fillna(df["Embarked"].mode()[0])

# ✅ Encode categorical columns
le = LabelEncoder()
df["Sex"] = le.fit_transform(df["Sex"])
df["Embarked"] = le.fit_transform(df["Embarked"])

# ✅ Split features and label
X = df.drop("Survived", axis=1)
y = df["Survived"]

# ✅ Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ✅ Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# ✅ Make predictions and evaluate
y_pred = model.predict(X_test)
f1 = f1_score(y_test, y_pred)
print("✅ F1 Score:", f1)




Current working directory: /content
Columns in dataset: Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')
✅ F1 Score: 0.7638888888888888


# New Section

pip install kaggle
