In [36]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [37]:
# Step 1: Load and Preprocess the Telco Customer Churn Dataset

In [64]:
# Load and preprocess the dataset
def load_data(file_path):
    df = pd.read_csv(file_path)
    
    # Drop columns not needed for preference analysis
    if 'Customer ID' in df.columns:
        df.drop('Customer ID', axis=1, inplace=True)
    
    # Handle missing values separately for numeric and categorical columns
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    categorical_cols = df.select_dtypes(include=['object']).columns
    
    # Fill missing values for numeric columns with mean
    df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
    
    # Fill missing values for categorical columns with mode
    for col in categorical_cols:
        df[col].fillna(df[col].mode()[0], inplace=True)
    
    return df

In [52]:
# Convert qualitative preferences to numerical values

In [53]:
def quantify_preferences(df):
    # Convert categorical columns to numerical values
    df['Pref_Comm_Channel'] = df['Preferred Communication Channel'].map({'Email': 1, 'Phone': 2, 'SMS': 3})
    df['Pref_Comm_Time'] = df['Preferred Communication Time'].map({'Morning': 1, 'Afternoon': 2, 'Evening': 3, 'Anytime': 4})
    df['Pref_Services_Features'] = df['Preferred Services/Features'].map({'International Calling': 1, 'Data Rollover': 2, 'Family Plan': 3})
    df['Pref_Payment_Method'] = df['Preferred Payment Method'].map({'Bank Transfer': 1, 'Debit Card': 2, 'Online': 3})
    
    # Drop original preference columns
    df.drop(['Preferred Communication Channel', 'Preferred Communication Time', 'Preferred Services/Features', 'Preferred Payment Method'], axis=1, inplace=True)
    
    return df

In [54]:
# Normalize the data
def normalize_data(df):
    scaler = MinMaxScaler()
    df[df.select_dtypes(include=[np.number]).columns] = scaler.fit_transform(df.select_dtypes(include=[np.number]))
    return df

In [61]:
# Define the TOPSIS method with customer preferences and non-preferences
def topsis_with_preferences(df, non_pref_criteria, weights):
    # Normalize the decision matrix
    df_normalized = df[non_pref_criteria] / np.sqrt((df[non_pref_criteria]**2).sum())
    
    # Apply weights
    df_weighted = df_normalized * weights
    
    # Determine ideal and negative-ideal solutions
    ideal_solution = df_weighted.max()
    negative_ideal_solution = df_weighted.min()
    
    # Calculate separation measures
    df['distance_to_ideal'] = np.sqrt(((df_weighted - ideal_solution) ** 2).sum(axis=1))
    df['distance_to_negative_ideal'] = np.sqrt(((df_weighted - negative_ideal_solution) ** 2).sum(axis=1))
    
    # Calculate the relative closeness to the ideal solution
    df['relative_closeness'] = df['distance_to_negative_ideal'] / (df['distance_to_negative_ideal'] + df['distance_to_ideal'])
    
    # Rank the service plans
    df['rank'] = df['relative_closeness'].rank(ascending=False)
    
    return df

In [74]:
# Implement the decision-making process with preferences and non-preferences
def decision_making_with_preferences(df, non_pref_criteria, weights):
    df_ranked = topsis_with_preferences(df, non_pref_criteria, weights)
    
    # Map ranks to the service plans (0 to 3)
    num_plans = 4
    df_ranked['BestService'] = pd.cut(df_ranked['rank'], bins=num_plans, labels=False, include_lowest=True)
    
    # Map service indices to service names
    service_names = {0: 'Basic Plan', 1: 'Standard Plan', 2: 'Premium Plan', 3: 'Ultimate Plan'}
    df_ranked['BestServiceName'] = df_ranked['BestService'].map(service_names)
    
    return df_ranked

In [75]:
# Define non-preference criteria and their weights
non_pref_criteria = ['Data Usage (GB)', 'Voice Minutes Usage', 'SMS Usage']
weights = [0.3, 0.3, 0.4]  #  weights for non-preference criteria

In [76]:
# Load and preprocess dataset
df = load_data('dataset.csv')
df = quantify_preferences(df)
df = normalize_data(df)

In [77]:
# Apply decision-making model
df_ranked = decision_making_with_preferences(df, non_pref_criteria, weights)

In [80]:
print(df_ranked[['BestService', 'BestServiceName']].head())

   BestService BestServiceName
0            1   Standard Plan
1            1   Standard Plan
2            3   Ultimate Plan
3            2    Premium Plan
4            0      Basic Plan
