In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
df = pd.read_csv("./telecom_churn_data.csv")

# Drop CustomerID (not needed for prediction)
df.drop(columns=["CustomerID"], inplace=True)

# Convert categorical variables into numerical using Label Encoding
label_encoders = {}
categorical_columns = ["Contract", "InternetService"]

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le # Store encoders for future use

# Split dataset into features (X) and target variable (y)
X = df.drop(columns=["Churn"]) # Features
y = df["Churn"] # Target variable

# Scale numerical features for better performance
scaler = StandardScaler()
X[["Tenure", "MonthlyCharges", "TotalCharges", "SupportTickets"]] = scaler.fit_transform(
X[["Tenure", "MonthlyCharges", "TotalCharges", "SupportTickets"]]
)

# Split into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
# Try increasing weight for churn (class 1)
model = LogisticRegression(class_weight={0: 1, 1: 3})
model.fit(X_train, y_train)


# Predict on test data
y_pred = model.predict(X_test)

# Create a DataFrame to display features and predictions
predictions_df = X_test.copy()  # Copy test features
predictions_df["Predicted Churn"] = y_pred  # Add predictions as a new column

# Show the first 10 rows
print(predictions_df.head(10))

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

 #Classification Report (without macro and weighted averages)
report = classification_report(y_test, y_pred, target_names=["No Churn", "Churn"], output_dict=True)
print("\nClassification Report:")
for label, metrics in report.items():
    if label in ["No Churn", "Churn"]:
        print(f"{label}:")
        print(f"  Precision: {metrics['precision']:.2f}")
        print(f"  Recall: {metrics['recall']:.2f}")
        print(f"  F1-Score: {metrics['f1-score']:.2f}")
        print(f"  Support: {int(metrics['support'])}")

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))