<a href="https://colab.research.google.com/github/mhakashif/Swiggy-Churn-And-Analytics/blob/main/Churn%20Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('swiggy.csv')
df.head()

Unnamed: 0,ID,Area,City,Restaurant,Price,Avg ratings,Total ratings,Food type,Address,Delivery time
0,211,Koramangala,Bangalore,Tandoor Hut,300.0,4.4,100,"Biryani,Chinese,North Indian,South Indian",5Th Block,59
1,221,Koramangala,Bangalore,Tunday Kababi,300.0,4.1,100,"Mughlai,Lucknowi",5Th Block,56
2,246,Jogupalya,Bangalore,Kim Lee,650.0,4.4,100,Chinese,Double Road,50
3,248,Indiranagar,Bangalore,New Punjabi Hotel,250.0,3.9,500,"North Indian,Punjabi,Tandoor,Chinese",80 Feet Road,57
4,249,Indiranagar,Bangalore,Nh8,350.0,4.0,50,"Rajasthani,Gujarati,North Indian,Snacks,Desser...",80 Feet Road,63


In [None]:
df.columns

Index(['ID', 'Area', 'City', 'Restaurant', 'Price', 'Avg ratings',
       'Total ratings', 'Food type', 'Address', 'Delivery time'],
      dtype='object')

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder



# Create churn columns
threshold_rating = 3  # Define the threshold rating for churn1
threshold_time = 30   # Define the threshold delivery time for churn2

df['churn1'] = df['Total ratings'].apply(lambda x: 1 if x < threshold_rating else 0)
df['churn2'] = df['Delivery time'].apply(lambda x: 1 if x > threshold_time else 0)

# Encode categorical variables like city and area
le_city = LabelEncoder()
le_area = LabelEncoder()
df['city_encoded'] = le_city.fit_transform(df['City'])
df['area_encoded'] = le_area.fit_transform(df['Area'])

# Independent variables (features)
X = df[['city_encoded', 'area_encoded']]

# Dependent variables (targets) for churn1 and churn2
y_churn1 = df['churn1']
y_churn2 = df['churn2']


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train_churn1, y_test_churn1 = train_test_split(X, y_churn1, test_size=0.2, random_state=42)
X_train, X_test, y_train_churn2, y_test_churn2 = train_test_split(X, y_churn2, test_size=0.2, random_state=42)

# Build Random Forest model for churn1
model_churn1 = RandomForestClassifier()
model_churn1.fit(X_train, y_train_churn1)

# Build Random Forest model for churn2
model_churn2 = RandomForestClassifier()
model_churn2.fit(X_train, y_train_churn2)


In [None]:
from sklearn.metrics import accuracy_score

# Predict on the test data for churn1
y_pred_churn1 = model_churn1.predict(X_test)
accuracy_churn1 = accuracy_score(y_test_churn1, y_pred_churn1)

# Predict on the test data for churn2
y_pred_churn2 = model_churn2.predict(X_test)
accuracy_churn2 = accuracy_score(y_test_churn2, y_pred_churn2)

# Output the accuracy scores
print(f"Accuracy for churn1: {accuracy_churn1 * 100:.2f}%")
print(f"Accuracy for churn2: {accuracy_churn2 * 100:.2f}%")


Accuracy for churn1: 100.00%
Accuracy for churn2: 96.54%


In [None]:
# Get user input dynamically (ensure non-empty, valid strings)
user_city = input("Enter the city: ").strip()
user_area = input("Enter the area: ").strip()

# Check if inputs are valid (non-empty)
if user_city == "" or user_area == "":
    print("Error: City and area inputs cannot be empty.")
else:
    # Define a function to handle unseen values
    def safe_label_transform(label_encoder, input_value):
        if input_value in label_encoder.classes_:
            return label_encoder.transform([input_value])[0]
        else:
            print(f"Warning: '{input_value}' is not in the training data. Using a default value.")
            return -1  # Default encoding for unknown input

    # Safely encode the user input using the label encoders
    user_city_encoded = safe_label_transform(le_city, user_city)
    user_area_encoded = safe_label_transform(le_area, user_area)

    # Check if the encoded values are valid
    if user_city_encoded != -1 and user_area_encoded != -1:
        user_input = [[user_city_encoded, user_area_encoded]]

        # Predict churn1 and churn2
        churn1_prediction = model_churn1.predict(user_input)[0]
        churn2_prediction = model_churn2.predict(user_input)[0]

        # Output the result
        print(f"In {user_city} ({user_area}), Churn1 prediction: {churn1_prediction}, Churn2 prediction: {churn2_prediction}")
    else:
        print("Unable to make predictions due to unknown city or area.")


Enter the city: Hyderabad
Enter the area: Begum Bazar
In Hyderabad (Begum Bazar), Churn1 prediction: 0, Churn2 prediction: 1
