In [3]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob
import joblib

class AutoMotoAIServiceReminder:
    def __init__(self, n_clusters=3):
        # Features required for model and clustering
        self.numeric_features = [
            'feedback_score',
            'feedback_sentiment',
            'last_service_cost',
            'days_since_last_service',
            'next_service_due_days',
            'age_of_vehicle',
            'odometer_reading'
        ]
        self.categorical_features = [
            'customer_type',
            'AMC_status'
        ]
        self.text_feature = 'customer_feedback'
        self.cluster_features = self.numeric_features + self.categorical_features
        self.n_clusters = n_clusters
        self.model = None
        self.clustering = None
        self.preprocessor = None
        self.df = None

    def _extract_sentiment(self, text):
        # Sentiment polarity extraction [-1,1]
        if pd.isna(text) or not str(text).strip():
            return 0.0
        return TextBlob(str(text)).sentiment.polarity

    def load_data(self, df):
        df = df.copy()
        # Sentiment feature
        df['feedback_sentiment'] = df['customer_feedback'].apply(self._extract_sentiment)
        # Target (example logic, can be replaced by real label)
        df['service_urgent'] = np.where(
            (df['next_service_due_days'] <= 120) |
            (df['feedback_score'] <= 2) |
            (df['customer_feedback'].str.contains('poor|unresponsive|delay', case=False, na=False)), 1, 0)
        self.df = df
        return df

    def build_preprocessor(self):
        # Text vectorizer for feedback (100 features)
        text_transformer = TfidfVectorizer(max_features=100)
        self.preprocessor = ColumnTransformer([
            ('num', Pipeline([
                ('imputer', SimpleImputer(strategy='median')),
                ('scaler', StandardScaler())
            ]), self.numeric_features),
            ('cat', Pipeline([
                ('imputer', SimpleImputer(strategy='most_frequent')),
                ('onehot', OneHotEncoder(handle_unknown='ignore'))
            ]), self.categorical_features),
            ('text', text_transformer, self.text_feature)
        ])
        return self.preprocessor

    def train_model(self):
        # Fit pipeline and classifier
        X = self.df[self.numeric_features + self.categorical_features + [self.text_feature]]
        y = self.df['service_urgent']
        self.build_preprocessor()
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.model = Pipeline([
            ('preprocess', self.preprocessor),
            ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
        ])
        self.model.fit(X_train, y_train)
        y_pred = self.model.predict(X_test)
        print("\nClassification Report (validation):\n", classification_report(y_test, y_pred))
        self.df['predicted_urgency'] = self.model.predict(X)
        return self.model

    def cluster_segments(self):
        # Process data for clustering (no text)
        X_cluster = self.df[self.numeric_features + self.categorical_features]
        # Simple preprocessing (scale + onehot)
        num_proc = Pipeline([('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())])
        cat_proc = Pipeline([('imputer', SimpleImputer(strategy='most_frequent')), ('onehot', OneHotEncoder(handle_unknown='ignore'))])
        cluster_proc = ColumnTransformer([
            ('num', num_proc, self.numeric_features),
            ('cat', cat_proc, self.categorical_features)
        ])
        feat_mat = cluster_proc.fit_transform(X_cluster)
        self.clustering = KMeans(n_clusters=self.n_clusters, random_state=42)
        self.df['customer_cluster'] = self.clustering.fit_predict(feat_mat)
        print(f"\nCluster counts:\n{self.df['customer_cluster'].value_counts()}\n")
        return self.clustering

    def generate_message(self, row):
        templates = {
            0: "Hi {type} customer! We've lined up special attention for your {make} {model}. Service due in {due} days. Book via AutoMoto AI for an extra 10% off!",
            1: "Hello {type} customer! Your {make} {model} needs maintenance in {due} days. Our team is ready on {channel}. Let AutoMoto AI make it easy!",
            2: "Dear valued {type} customer: Keep your {make} {model} running smooth—service is due in {due}. Tap here to confirm your spot with AutoMoto AI!"
        }
        temp = templates.get(row['customer_cluster'], templates[0])
        msg = temp.format(
            type=row.get('customer_type', 'Valued'),
            make=row.get('make', ''),
            model=row.get('model', ''),
            due=row.get('next_service_due_days', 'N/A'),
            channel='/'.join(row.get('preferred_channels', ['WhatsApp', 'Email', 'SMS']))
        )
        # Add feedback-conditional text
        if row.get('predicted_urgency') == 1 and row.get('feedback_score', 5) <= 2:
            msg += " (Apology: You're eligible for our priority care and a 15% recovery discount.)"
        return msg

    def recommend_channels(self, row):
        # Learn or use historical best channels—here, segmented by cluster
        cluster = row['customer_cluster']
        if cluster == 0:
            return ['WhatsApp', 'Phone', 'Email']
        if cluster == 1:
            return ['WhatsApp', 'Email']
        return ['Email', 'SMS']

    def generate_reminder_table(self):
        # Predict urgency if not yet done
        if 'predicted_urgency' not in self.df.columns:
            self.df['predicted_urgency'] = self.model.predict(
                self.df[self.numeric_features + self.categorical_features + [self.text_feature]])
        # Cluster segments if not yet done
        if 'customer_cluster' not in self.df.columns:
            self.cluster_segments()
        # Channel and message
        self.df['preferred_channels'] = self.df.apply(self.recommend_channels, axis=1)
        self.df['personalized_message'] = self.df.apply(self.generate_message, axis=1)
        # Return those needing reminder
        return self.df[self.df['predicted_urgency'] == 1][
            ['make', 'model', 'customer_type', 'next_service_due_days',
             'customer_feedback', 'feedback_score', 'preferred_channels', 'personalized_message', 'customer_cluster']
        ]

    def save_model(self, path='automotoai_reminder_model.pkl'):
        joblib.dump(self.model, path)
        print(f"Model saved as {path}")
    def load_model(self, path='automotoai_reminder_model.pkl'):
        self.model = joblib.load(path)
        print(f"Model loaded from {path}")

    def save_reminder_table(self, path='automotoai_reminders.csv'):
        reminder_table = self.generate_reminder_table()
        reminder_table.to_csv(path, index=False)
        print(f"Reminder table saved as {path}")

# ------------ Usage Example -------------

# 1. Load your customer/service/feedback DataFrame (must have needed columns)
# For demo, build/simulate your DataFrame 'df' here or load from CSV
# df = pd.read_csv("your_service_customer_data.csv")

# 2. Setup the agent and run all steps:
# agent = AutoMotoAIServiceReminder(n_clusters=3)
# agent.load_data(df)
# agent.train_model()
# agent.cluster_segments()
# reminders = agent.generate_reminder_table()
# agent.save_reminder_table('service_reminders.csv')
# agent.save_model('automotoai_reminder_model.pkl')
# print(reminders.head())


In [6]:
df = pd.read_csv("ai_service_reminders.csv")

In [7]:
df

Unnamed: 0,location,customer_type,preferred_language,make,model,year_of_purchase,age_of_vehicle,fuel_type,transmission,odometer_reading,...,days_since_follow_up,days_since_feedback,days_since_last_call,alert_due,feedback_sentiment,service_urgent,predicted_urgency,customer_segment,personalized_message,preferred_channels
0,OMR,Retail,Tamil,Ford,Aspire,2019,6,Electric,Automatic,59174,...,0.0,268,34,True,-0.4,1,1,Critical,"Dear Retail Customer,\n\nWe sincerely apologiz...","['Phone', 'WhatsApp', 'Email']"
1,T Nagar,Fleet,Hindi,Honda,City,2015,10,Electric,Manual,77667,...,114.0,212,33,True,0.0,1,1,Critical,"Dear Fleet Customer,\n\nWe sincerely apologize...","['Phone', 'WhatsApp', 'Email']"
2,Anna Nagar,Fleet,Tamil,Ford,Figo,2016,9,Diesel,Manual,110023,...,0.0,182,40,True,-0.4,1,1,Critical,"Dear Fleet Customer,\n\nWe sincerely apologize...","['Phone', 'WhatsApp', 'Email']"
3,T Nagar,Fleet,Hindi,Toyota,Innova,2021,4,Electric,Manual,96833,...,17.0,409,-69,True,-0.4,1,1,Critical,"Dear Fleet Customer,\n\nWe sincerely apologize...","['Phone', 'WhatsApp', 'Email']"
4,Anna Nagar,Fleet,Tamil,Honda,Amaze,2020,5,Diesel,Automatic,68402,...,0.0,94,114,False,-0.4,1,1,Critical,"Dear Fleet Customer,\n\nWe sincerely apologize...","['Phone', 'WhatsApp', 'Email']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
671,Velachery,Retail,Tamil,Ford,Figo,2021,4,Petrol,Automatic,93909,...,8.0,111,12,False,0.0,1,1,Critical,"Dear Retail Customer,\n\nWe sincerely apologiz...","['Phone', 'WhatsApp', 'Email']"
672,Anna Nagar,Retail,Hindi,Hyundai,i20,2015,10,Diesel,Manual,101418,...,30.0,160,37,False,0.0,1,1,Critical,"Dear Retail Customer,\n\nWe sincerely apologiz...","['Phone', 'WhatsApp', 'Email']"
673,T Nagar,Retail,Tamil,Toyota,Innova,2021,4,Electric,Automatic,52739,...,16.0,106,-69,False,0.0,1,1,Critical,"Dear Retail Customer,\n\nWe sincerely apologiz...","['Phone', 'WhatsApp', 'Email']"
674,OMR,Fleet,Tamil,Hyundai,i10,2015,10,Petrol,Automatic,66442,...,0.0,337,34,True,0.0,1,1,Critical,"Dear Fleet Customer,\n\nWe sincerely apologize...","['Phone', 'WhatsApp', 'Email']"


In [9]:
agent = AutoMotoAIServiceReminder(n_clusters=3)
agent.load_data(df)
agent.train_model()
agent.cluster_segments()
reminders = agent.generate_reminder_table()
agent.save_reminder_table('ai_agent_service_reminders.csv')
agent.save_model('automotoai_reminder_model.pkl')
print(reminders.head())


Classification Report (validation):
               precision    recall  f1-score   support

           1       1.00      1.00      1.00       136

    accuracy                           1.00       136
   macro avg       1.00      1.00      1.00       136
weighted avg       1.00      1.00      1.00       136


Cluster counts:
2    277
1    227
0    172
Name: customer_cluster, dtype: int64

Reminder table saved as ai_agent_service_reminders.csv
Model saved as automotoai_reminder_model.pkl
     make   model customer_type  next_service_due_days customer_feedback  \
0    Ford  Aspire        Retail                    357      Poor Service   
1   Honda    City         Fleet                    180      Unresponsive   
2    Ford    Figo         Fleet                    180      Poor Service   
3  Toyota  Innova         Fleet                    357      Poor Service   
4   Honda   Amaze         Fleet                    180      Poor Service   

   feedback_score preferred_channels  \
0         