In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/Predict-the-Customer-Satisfaction-CSE-22/sample_submission.csv
/kaggle/input/Predict-the-Customer-Satisfaction-CSE-22/train_dataset.csv
/kaggle/input/Predict-the-Customer-Satisfaction-CSE-22/test_dataset.csv


In [2]:

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

# Load datasets
train_df = pd.read_csv('/kaggle/input/Predict-the-Customer-Satisfaction-CSE-22/train_dataset.csv')
test_df = pd.read_csv('/kaggle/input/Predict-the-Customer-Satisfaction-CSE-22/test_dataset.csv')

# Handling missing values using recommended assignment approach
train_df['loyalty_tier'] = train_df['loyalty_tier'].fillna(train_df['loyalty_tier'].mode()[0])
test_df['loyalty_tier'] = test_df['loyalty_tier'].fillna(test_df['loyalty_tier'].mode()[0])

train_df['Received_tier_discount_percentage'] = train_df['Received_tier_discount_percentage'].fillna(
    train_df['Received_tier_discount_percentage'].median()
)
test_df['Received_tier_discount_percentage'] = test_df['Received_tier_discount_percentage'].fillna(
    test_df['Received_tier_discount_percentage'].median()
)

train_df['Received_card_discount_percentage'] = train_df['Received_card_discount_percentage'].fillna(
    train_df['Received_card_discount_percentage'].median()
)
test_df['Received_card_discount_percentage'] = test_df['Received_card_discount_percentage'].fillna(
    test_df['Received_card_discount_percentage'].median()
)

# Encode categorical variables
# Encode Gender column (M: 1, F: 0, O: 2)
train_df['Gender'] = train_df['Gender'].map({'M': 1, 'F': 0, 'O': 2})
test_df['Gender'] = test_df['Gender'].map({'M': 1, 'F': 0, 'O': 2})

# Encode loyalty program membership (YES: 1, NO: 0)
train_df['Is_current_loyalty_program_member'] = train_df['Is_current_loyalty_program_member'].map({'YES': 1, 'NO': 0})
test_df['Is_current_loyalty_program_member'] = test_df['Is_current_loyalty_program_member'].map({'YES': 1, 'NO': 0})

# Encode target variable 'customer_experience'
label_encoder = LabelEncoder()
train_df['customer_experience'] = label_encoder.fit_transform(train_df['customer_experience'])

# Convert date columns to datetime and extract year, month, day
date_columns = ['Date_Registered', 'payment_datetime', 'purchased_datetime', 
                'released_date', 'estimated_delivery_date', 'received_date']

for col in date_columns:
    train_df[col] = pd.to_datetime(train_df[col], errors='coerce')
    test_df[col] = pd.to_datetime(test_df[col], errors='coerce')
    
    train_df[f'{col}_year'] = train_df[col].dt.year
    train_df[f'{col}_month'] = train_df[col].dt.month
    train_df[f'{col}_day'] = train_df[col].dt.day

    test_df[f'{col}_year'] = test_df[col].dt.year
    test_df[f'{col}_month'] = test_df[col].dt.month
    test_df[f'{col}_day'] = test_df[col].dt.day

# Drop original date columns
train_df.drop(columns=date_columns, inplace=True)
test_df.drop(columns=date_columns, inplace=True)

# Standardize the 'final_payment' column
scaler = StandardScaler()
train_df[['final_payment']] = scaler.fit_transform(train_df[['final_payment']])
test_df[['final_payment']] = scaler.transform(test_df[['final_payment']])

# Drop unnecessary columns
columns_to_drop = ['user_id', 'transaction_id', 'order_id', 'tracking_number']
train_df.drop(columns=columns_to_drop, errors='ignore', inplace=True)
test_df.drop(columns=columns_to_drop, errors='ignore', inplace=True)

# One-hot encode categorical columns
categorical_columns = ['product_category', 'payment_method', 'shipping_method', 'purchase_medium']
train_df = pd.get_dummies(train_df, columns=categorical_columns, drop_first=True)
test_df = pd.get_dummies(test_df, columns=categorical_columns, drop_first=True)

# Align test dataset columns with train dataset
test_df = test_df.reindex(columns=train_df.columns, fill_value=0)

# Fill any remaining missing values with 0
train_df.fillna(0, inplace=True)
test_df.fillna(0, inplace=True)

# Split features and target
train_x = train_df.drop(columns=['customer_experience'])  # Features
train_y = train_df['customer_experience']  # Target variable

# Ensure test set doesn't contain the target column
test_x = test_df.drop(columns=['customer_experience'], errors='ignore')



# Train Random Forest classifier model
model = RandomForestClassifier(random_state=42)
model.fit(train_x, train_y)

# Predict on test data
predictions = model.predict(test_x)



In [3]:
# Count occurrences of each label
count_zeros = np.count_nonzero(predictions == 0)
count_ones = np.count_nonzero(predictions == 1)
count_twos = np.count_nonzero(predictions == 2)

print(f"Count of 0s: {count_zeros}")  
print(f"Count of 1s: {count_ones}")   
print(f"Count of 2s: {count_twos}")   


Count of 0s: 56588
Count of 1s: 72938
Count of 2s: 8445


In [4]:
# Convert predictions back to original labels for interpretation
decoded_predictions = label_encoder.inverse_transform(predictions)


# Export predictions if needed
output = pd.DataFrame({ 'id': range(len(predictions)),
    'customer_experience': decoded_predictions})
# Save the DataFrame to a CSV file with the specified format
output.to_csv('customer_satisfaction_predictions.csv', index=False)

print("CSV file saved successfully!")

CSV file saved successfully!
