In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/customer-support-on-twitter/sample.csv
/kaggle/input/customer-support-on-twitter/twcs/twcs.csv


# ðŸ“¦ Step 1: Import libraries

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# ðŸ“‚ Step 2: Load dataset

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
df = pd.read_csv("/kaggle/input/customer-support-on-twitter/sample.csv")

print("âœ… Data loaded successfully")
print("Columns:", df.columns)
print(df.head(3))


âœ… Data loaded successfully
Columns: Index(['tweet_id', 'author_id', 'inbound', 'created_at', 'text',
       'response_tweet_id', 'in_response_to_tweet_id'],
      dtype='object')
   tweet_id     author_id  inbound                      created_at  \
0    119237        105834     True  Wed Oct 11 06:55:44 +0000 2017   
1    119238  ChaseSupport    False  Wed Oct 11 13:25:49 +0000 2017   
2    119239        105835     True  Wed Oct 11 13:00:09 +0000 2017   

                                                text response_tweet_id  \
0  @AppleSupport causing the reply to be disregar...            119236   
1  @105835 Your business means a lot to us. Pleas...               NaN   
2  @76328 I really hope you all change but I'm su...            119238   

   in_response_to_tweet_id  
0                      NaN  
1                 119239.0  
2                      NaN  


# ðŸ§¹ Step 3: Clean + Keep useful columns

In [5]:
df = df[['text', 'author_id']].dropna()


df = df.head(5000)
print("âœ… Data cleaned and trimmed:", df.shape)

âœ… Data cleaned and trimmed: (93, 2)


# ðŸ§  Step 4: Split data

In [6]:
X = df['text']
y = df['author_id']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("âœ… Data split done")
print("Train size:", X_train.shape[0], " | Test size:", X_test.shape[0])

âœ… Data split done
Train size: 74  | Test size: 19


# ðŸ”¤ Step 5: Vectorize text

In [7]:
vectorizer = TfidfVectorizer(stop_words='english', max_features=2000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)
print("âœ… Vectorization complete")


âœ… Vectorization complete


# ðŸ¤– Step 6: Train model


In [8]:
model = LogisticRegression(max_iter=500)
model.fit(X_train_tfidf, y_train)
print("âœ… Model trained successfully")

âœ… Model trained successfully


# ðŸ“Š Step 7: Evaluate performance


In [9]:
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print(f"ðŸŽ¯ Model Accuracy: {accuracy:.2f}")

ðŸŽ¯ Model Accuracy: 0.21


# ðŸ’¬ Step 8: Chatbot test


In [10]:
def chatbot_response(user_input):
    user_tfidf = vectorizer.transform([user_input])
    prediction = model.predict(user_tfidf)[0]
    return f"ðŸ¤– Reply: This message seems related to company ID {prediction}"
    # Example
example = "I need help with my account"
print(chatbot_response(example))

ðŸ¤– Reply: This message seems related to company ID AppleSupport


# ðŸ’¾ Step 9: Save model and vectorizer


In [11]:
import joblib


joblib.dump(model, "chatbot_model.pkl")

# TF-IDF vectorizer
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

print("âœ… Model and vectorizer saved successfully!")# ðŸ’¾ Step 9: Save model and vectorizer
import joblib

joblib.dump(model, "chatbot_model.pkl")

#  save TF-IDF vectorizer
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

print("âœ… Model and vectorizer saved successfully!")

âœ… Model and vectorizer saved successfully!
âœ… Model and vectorizer saved successfully!
