In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [26]:
def load_data(file_path):
    df = pd.read_csv(file_path)
    
    df_computation = df.copy()

    if 'Customer ID' in df_computation.columns:
        df_computation.drop('Customer ID', axis=1, inplace=True)
    
    numeric_cols = df_computation.select_dtypes(include=[np.number]).columns
    categorical_cols = df_computation.select_dtypes(include=['object']).columns
    
    df_computation[numeric_cols] = df_computation[numeric_cols].fillna(df_computation[numeric_cols].mean())
    
    for col in categorical_cols:
        df_computation[col].fillna(df_computation[col].mode()[0], inplace=True)
    
    return df, df_computation

In [7]:
def quantify_preferences(df):
    df['Pref_Comm_Channel'] = df['Preferred Communication Channel'].map({'Email': 1, 'Phone': 2, 'SMS': 3})
    df['Pref_Comm_Time'] = df['Preferred Communication Time'].map({'Morning': 1, 'Afternoon': 2, 'Evening': 3, 'Anytime': 4})
    df['Pref_Services_Features'] = df['Preferred Services/Features'].map({'International Calling': 1, 'Data Rollover': 2, 'Family Plan': 3})
    df['Pref_Payment_Method'] = df['Preferred Payment Method'].map({'Bank Transfer': 1, 'Debit Card': 2, 'Online': 3, 'Credit Card': 4})
    
    df.drop(['Preferred Communication Channel', 'Preferred Communication Time', 'Preferred Services/Features', 'Preferred Payment Method'], axis=1, inplace=True)
    
    return df

In [8]:
def normalize_data(df):
    scaler = MinMaxScaler()
    df[df.select_dtypes(include=[np.number]).columns] = scaler.fit_transform(df.select_dtypes(include=[np.number]))
    return df

In [9]:
def topsis_with_preferences(df, non_pref_criteria, weights):
    df_normalized = df[non_pref_criteria] / np.sqrt((df[non_pref_criteria]**2).sum())
    
    df_weighted = df_normalized * weights
    
    ideal_solution = df_weighted.max()
    negative_ideal_solution = df_weighted.min()
    
    df['distance_to_ideal'] = np.sqrt(((df_weighted - ideal_solution) ** 2).sum(axis=1))
    df['distance_to_negative_ideal'] = np.sqrt(((df_weighted - negative_ideal_solution) ** 2).sum(axis=1))
    
    df['relative_closeness'] = df['distance_to_negative_ideal'] / (df['distance_to_negative_ideal'] + df['distance_to_ideal'])
    
    df['rank'] = df['relative_closeness'].rank(ascending=False)
    
    return df

In [10]:
def decision_making_with_preferences(df, non_pref_criteria, weights):
    df_ranked = topsis_with_preferences(df, non_pref_criteria, weights)
    
    num_plans = 4
    df_ranked['BestService'] = pd.cut(df_ranked['rank'], bins=num_plans, labels=False, include_lowest=True)
    
    service_names = {0: 'Basic Plan', 1: 'Standard Plan', 2: 'Premium Plan', 3: 'Ultimate Plan'}
    df_ranked['BestServiceName'] = df_ranked['BestService'].map(service_names)
    
    return df_ranked

In [11]:
non_pref_criteria = ['Data Usage (GB)', 'Voice Minutes Usage', 'SMS Usage']
weights = [0.3, 0.3, 0.4]  

In [12]:
df = load_data('dataset.csv')
df = quantify_preferences(df)
df = normalize_data(df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)


In [13]:
df_ranked = decision_making_with_preferences(df, non_pref_criteria, weights)

In [14]:
print(df_ranked[['BestService', 'BestServiceName']].head())

   BestService BestServiceName
0            1   Standard Plan
1            1   Standard Plan
2            3   Ultimate Plan
3            2    Premium Plan
4            0      Basic Plan


In [15]:
print(df_ranked[['BestService', 'BestServiceName']].head())

   BestService BestServiceName
0            1   Standard Plan
1            1   Standard Plan
2            3   Ultimate Plan
3            2    Premium Plan
4            0      Basic Plan


In [16]:
def save_predicted_plans(df_original, df_ranked, output_file='plans_dataset.csv'):
    columns_to_save = ['Customer ID', 'Age', 'Gender', 'Location', 'Education Level', 'BestServiceName']
    
    df_plans = pd.concat([df_original[['Customer ID', 'Age', 'Gender', 'Location', 'Education Level']], df_ranked['BestServiceName']], axis=1)
    
    df_plans.to_csv(output_file, index=False)
    print(f"Predicted plans dataset saved to {output_file}")

In [29]:
def save_predicted_plans(df_original, df_ranked, output_file='plans_dataset.csv'):
    columns_to_save = ['Customer ID', 'Age', 'Gender', 'Location', 'Education Level',  'BestServiceName']
    
    df_plans = pd.concat([df_original[['Customer ID', 'Age', 'Gender', 'Location', 'Education Level']], df_ranked['BestServiceName']], axis=1)
    
    df_plans.to_csv(output_file, index=False)
    print(f"Predicted plans dataset saved to {output_file}")

In [30]:
def process_and_save_predictions(file_path):
    df_original, df_computation = load_data(file_path)
    
    df = quantify_preferences(df_computation)
    
    df = normalize_data(df)
    
    non_pref_criteria = ['Pref_Comm_Channel', 'Pref_Comm_Time', 'Pref_Services_Features', 'Pref_Payment_Method']
    weights = [0.3, 0.3, 0.4,0.3]  
    df_ranked = decision_making_with_preferences(df, non_pref_criteria, weights)
    
    save_predicted_plans(df_original, df_ranked)

In [31]:
process_and_save_predictions('dataset.csv')

Predicted plans dataset saved to plans_dataset.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_computation[col].fillna(df_computation[col].mode()[0], inplace=True)
