In [None]:
import pandas as pd
import random
from datetime import datetime, timedelta

# ---- Define Data Elements ----
cities = ["Mumbai", "Delhi", "Bangalore", "Chennai", "Kolkata", "Hyderabad", "Jaipur", "Surat"]
items = ["Steel Rods", "Copper Wires", "Cement", "Fasteners", "Glass Sheets", "Electrical Wiring", "Plastic Sheets", "Wood Panels", "Aluminum Plates", "Iron Pipes"]
categories = ["Transportation", "Logistics", "Warehouse Management"]
subcategories = {
    "Transportation": ["Route Efficiency", "Driver Performance", "Vehicle Condition"],
    "Logistics": ["Delivery Time", "Order Accuracy", "Packaging Quality"],
    "Warehouse Management": ["Stock Organization", "Space Utilization", "Equipment Efficiency"]
}
routes = ["Mumbai-Pune Expressway", "NH48", "Delhi-Gurgaon Expressway", "Chennai-Bangalore Highway", "Kolkata-Durgapur Route", "", "Hyderabad-Vijayawada Road", "Surat-Ahmedabad Highway"]
industries = ["Steel", "Textile", "Construction", "Automobile", "Pharmaceutical", "Electronics", "Plastic"]

# ---- Expanded Feedback Templates (70 new + existing) ----
feedback_templates = {
    "Route Efficiency": [
        # Existing
        "Route via {route} is congested; suggest {new_route}.",
        "Best route is {route}; saved {mins} mins.",
        "Avoid {route} due to construction.",
        # New (10 more)
        "Heavy traffic on {route} delayed shipment by {hours} hrs.", 
        "{route} has tolls that increase costs; try {new_route}.",
        "Road conditions on {route} are poor, suggest alternate.",
        "{route} is too narrow for large trucks.",
        "Using {route} avoided city traffic, great choice!",
        "Night travel on {route} is faster by {mins} mins.",
        "{route} flooded during rains; reroute needed.",
        "Shortest path via {route} still took {hours} hrs due to signals.",
        "{new_route} could cut travel time significantly.",
        "Accident on {route} halted delivery; need backup plan."
    ],
    "Driver Performance": [
        # Existing
        "Driver was efficient and polite.",
        "Driver took a longer route unnecessarily.",
        "Driver needs better navigation training.",
        # New (10 more)
        "Driver handled {item} with care, impressive!",
        "Driver ignored GPS, added {hours} hrs to trip.",
        "Rude driver upset the client.",
        "Driver’s quick thinking avoided a delay.",
        "Driver sped through {route}, risking safety.",
        "Excellent communication from driver about delays.",
        "Driver forgot to check {item} quantity.",
        "Driver’s overtime caused extra costs.",
        "Driver parked poorly, blocking warehouse access.",
        "Driver suggested {new_route}, worked well."
    ],
    "Vehicle Condition": [
        # Existing
        "Truck broke down midway.",
        "Vehicle was in great condition.",
        "Old truck slowed delivery by {hours} hrs.",
        # New (10 more)
        "Flat tire on {route} delayed us by {hours} hrs.",
        "New truck handled {item} load perfectly.",
        "Brakes failed, nearly lost {item}.",
        "Vehicle’s AC broke, driver struggled.",
        "Fuel efficiency on this truck saved costs.",
        "Leaky truck damaged {item} during rain.",
        "Old suspension rattled {item} loose.",
        "Vehicle’s GPS was outdated, caused confusion.",
        "Truck’s engine overheated on {route}.",
        "Clean vehicle impressed the client."
    ],
    "Delivery Time": [
        # Existing
        "Delivery was {hours} hours late due to traffic.",
        "On-time delivery, great job!",
        "Delayed by {hours} hours, unacceptable.",
        # New (10 more)
        "Early delivery by {hours} hrs, client thrilled!",
        "Missed deadline by {hours} hrs, lost trust.",
        "Traffic on {route} pushed delivery back {hours} hrs.",
        "Same-day delivery for {item} was flawless.",
        "Late by {hours} hrs due to warehouse delay.",
        "Driver’s detour added {hours} hrs to schedule.",
        "Weather delayed {item} by {hours} hrs.",
        "Express delivery option worked perfectly.",
        "Client waited {hours} hrs, needs apology.",
        "Consistent delays on {route}, rethink timing."
    ],
    "Order Accuracy": [
        # Existing
        "Received wrong item, expected {item}.",
        "Order was perfect, no issues.",
        "Missing {item} in shipment.",
        # New (10 more)
        "Extra {item} delivered by mistake.",
        "Order swapped with another client’s {item}.",
        "Spot-on delivery, every {item} accounted for.",
        "Short by 10 units of {item}, disappointing.",
        "Wrong color {item} sent, client rejected.",
        "Mixed up {item} sizes, caused rework.",
        "Perfect count on {item}, well done!",
        "Label error led to wrong {item} shipment.",
        "Over-delivered {item}, inventory issue.",
        "Missing paperwork for {item} delivery."
    ],
    "Packaging Quality": [
        # Existing
        "Packaging damaged, {item} affected.",
        "Excellent packaging, all intact.",
        "Needs better cushioning for {item}.",
        # New (10 more)
        "Waterproof packing saved {item} in rain.",
        "Crushed box ruined half the {item}.",
        "Fragile {item} arrived unbroken, great job!",
        "Tape failed, {item} spilled out.",
        "Eco-friendly packing impressed client.",
        "Too much plastic used for {item}.",
        "Poor stacking damaged {item} edges.",
        "Reinforced corners protected {item} well.",
        "Packaging tore open on {route} bumps.",
        "Minimal padding worked fine for {item}."
    ],
    "Stock Organization": [
        # Existing
        "Inventory misplaced; took {mins} mins to locate.",
        "Well-organized warehouse, quick access.",
        "Stock labels unclear.",
        # New (10 more)
        "Found {item} in wrong bay, confusing.",
        "Color-coded bins sped up {item} retrieval.",
        "No space for {item}, cluttered aisles.",
        "Digital inventory matched {item} perfectly.",
        "Old {item} mixed with new stock.",
        "Missing {item} from listed spot.",
        "Shelf collapsed, delayed {item} pickup.",
        "Clear signage helped locate {item} fast.",
        "Disorganized {item} caused {mins} min delay.",
        "FIFO system worked well for {item}."
    ],
    "Space Utilization": [
        # Existing
        "Warehouse overcrowded, hard to move.",
        "Plenty of space, efficient layout.",
        "Could use vertical storage better.",
        # New (10 more)
        "No room for {item}, stacked unsafely.",
        "Open floor plan eased {item} handling.",
        "Wasted corner space for {item} storage.",
        "Tight layout slowed {item} movement.",
        "High ceilings underused for {item}.",
        "Extra space allowed quick {item} sorting.",
        "Cramped dock delayed {item} unloading.",
        "Smart shelving fit more {item} easily.",
        "Overflow of {item} blocked pathways.",
        "Modular racks optimized {item} space."
    ],
    "Equipment Efficiency": [
        # Existing
        "Forklift down, slowed operations by {hours} hrs.",
        "New equipment sped up loading.",
        "Equipment needs maintenance.",
        # New (10 more)
        "Conveyor jam held up {item} for {hours} hrs.",
        "Fast pallet truck moved {item} in mins.",
        "Old crane struggled with {item} weight.",
        "Automated sorter boosted {item} prep.",
        "Forklift battery died mid-shift.",
        "New hoist cut {item} loading time.",
        "Rusty dolly scratched {item} surface.",
        "Smooth rollers handled {item} well.",
        "Loud equipment disrupted {item} checks.",
        "Upgraded scanner sped up {item} logs."
    ]
}

suggestions = [
    "Improve route planning with GPS.", "Train drivers on time management.", "Upgrade vehicle fleet.", 
    "Use real-time tracking.", "Enhance packaging materials.", "Implement barcode system.",
    "Schedule maintenance checks.", "Add more staff during peak hours.", "Switch to eco-friendly packing.",
    "Install better signage in warehouse."
]
price_suggestions = [
    "Negotiate bulk discounts with suppliers.", "Optimize route to reduce fuel costs by {percent}%.", 
    "Use local suppliers to cut shipping by {percent}%.", "Switch to lighter packaging materials.",
    "Lease newer vehicles for efficiency.", "Reduce overtime with better scheduling."
]

# ---- Generate Dataset with 200 Rows ----
def generate_feedback_data(n_rows=200):
    data = []
    current_date = datetime(2025, 3, 26)  # Today’s date
    
    for i in range(1, n_rows + 1):
        feedback_id = f"F{i:03d}"
        days_ago = random.randint(1, 365)  # Feedback from the last year
        date = (current_date - timedelta(days=days_ago)).strftime("%Y-%m-%d")
        category = random.choice(categories)
        subcategory = random.choice(subcategories[category])
        location = random.choice(cities)
        item = random.choice(items)
        
        # Feedback Text
        template = random.choice(feedback_templates[subcategory])
        route = random.choice(routes) if "Route" in subcategory else ""
        new_route = random.choice([r for r in routes if r != route and r != ""]) if route else ""
        hours = random.randint(1, 5) if "hours" in template else ""
        mins = random.randint(10, 60) if "mins" in template else ""
        feedback_text = template.format(route=route, new_route=new_route, hours=hours, mins=mins, item=item)
        
        # Rating and Best Reviews
        rating = random.randint(1, 5)
        best_reviews = feedback_text if rating >= 4 else "N/A"
        
        # Suggestions
        suggestion = random.choice(suggestions) if rating < 4 else "Keep up the good work!"
        price_suggestion = random.choice(price_suggestions).format(percent=random.randint(5, 20)) if rating < 4 else "Pricing is optimal."
        
        # Route Suggested
        route_suggested = new_route if "suggest" in feedback_text else ""
        
        # Industry Cluster Analysis
        nearby_industries = random.sample(industries, random.randint(1, 3))
        competitors = [f"Comp{random.randint(1, 5)}" for _ in range(random.randint(1, 3))]
        cluster_analysis = f"Nearby Industries: {', '.join(nearby_industries)}; Competitors: {', '.join(competitors)}"
        
        data.append([feedback_id, date, category, subcategory, location, item, feedback_text, rating, route_suggested, 
                     best_reviews, suggestion, price_suggestion, cluster_analysis])
    
    columns = ["FeedbackID", "Date", "Category", "SubCategory", "Location", "Item", "FeedbackText", "Rating", 
               "RouteSuggested", "BestReviews", "Suggestions", "SuggestionsForOptimizedPrices", "IndustryClusterAnalysis"]
    return pd.DataFrame(data, columns=columns)

# ---- Feedback Analysis Function ----
def analyze_feedback(item=None, location=None):
    df = generate_feedback_data()
    
    # Filter data
    if item:
        df = df[df["Item"].str.lower() == item.lower()]
    if location:
        df = df[df["Location"].str.lower() == location.lower()]
    
    if df.empty:
        return f"No feedback found for {item if item else 'any item'}{' in ' + location if location else ''}."
    
    result = f"\nFeedback Analysis{' for ' + item if item else ''}{' in ' + location if location else ''}:\n{'-' * 70}\n"
    for _, row in df.iterrows():
        result += (f"Feedback ID: {row['FeedbackID']}\n"
                   f"Date: {row['Date']}\n"
                   f"Category: {row['Category']}\n"
                   f"SubCategory: {row['SubCategory']}\n"
                   f"Location: {row['Location']}\n"
                   f"Item: {row['Item']}\n"
                   f"Feedback: {row['FeedbackText']}\n"
                   f"Rating: {row['Rating']}/5\n"
                   f"Route Suggested: {row['RouteSuggested'] or 'None'}\n"
                   f"Best Review: {row['BestReviews']}\n"
                   f"Suggestion: {row['Suggestions']}\n"
                   f"Price Optimization Suggestion: {row['SuggestionsForOptimizedPrices']}\n"
                   f"Industry Cluster: {row['IndustryClusterAnalysis']}\n"
                   f"{'-' * 70}\n")
    
    # Summary stats
    avg_rating = df["Rating"].mean()
    best_route = df["RouteSuggested"].mode()[0] if not df["RouteSuggested"].mode().empty else "None"
    top_review = df[df["Rating"] == 5]["FeedbackText"].iloc[0] if not df[df["Rating"] == 5].empty else "No 5-star reviews"
    result += (f"Summary:\n"
               f"Average Rating: {avg_rating:.2f}/5\n"
               f"Most Suggested Route: {best_route}\n"
               f"Top Review: {top_review}\n"
               f"Total Feedback Entries: {len(df)}")
    
    return result

# ---- Main Tool ----
def feedback_tool():
    print("""
📊 Transportation & Logistics Feedback Analyzer 📊
- Analyze feedback with ratings, reviews, suggestions, and industry clusters.
- Commands:
  - 'analyze [item] in [location]' (e.g., 'analyze steel rods in Mumbai')
  - 'analyze [item]' (e.g., 'analyze cement')
  - 'analyze in [location]' (e.g., 'analyze in Delhi')
  - 'quit' to exit
""")
    
    while True:
        command = input("Enter command: ").strip().lower()
        if command == "quit":
            print("Goodbye!")
            break
        
        try:
            if "analyze" in command:
                parts = command.split("analyze")[-1].strip().split(" in ")
                item = None
                location = None
                
                if len(parts) == 1:
                    if parts[0].strip():
                        item = parts[0].strip()
                elif len(parts) == 2:
                    if parts[0].strip():
                        item = parts[0].strip()
                    if parts[1].strip():
                        location = parts[1].strip()
                
                print(analyze_feedback(item, location))
            else:
                print("Unrecognized command. Use 'analyze [item] in [location]', 'analyze [item]', or 'analyze in [location]'.")
        except Exception as e:
            print(f"Error: {e}. Please check your input.")
import pandas as pd
import random
import datetime
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder
import joblib

# Data Generation
cities = ["Mumbai", "Delhi", "Bangalore", "Chennai", "Kolkata", "Hyderabad", "Ahmedabad", "Pune"]
items = ["Steel Rods", "Cement", "Electronics", "Wood", "Textiles"]
categories = ["Transportation", "Logistics", "Warehouse Management"]
subcategories = {"Transportation": ["Route Efficiency", "Delivery Time"],
                 "Logistics": ["Shipment Tracking", "Inventory Management"],
                 "Warehouse Management": ["Stock Organization", "Space Utilization"]}
routes = ["Mumbai-Delhi", "Bangalore-Chennai", "Kolkata-Hyderabad", "Ahmedabad-Pune"]
industries = ["Steel", "Textile", "Electronics", "Construction"]

feedback_templates = {
    "Route Efficiency": ["The route {route} has frequent delays.", "{route} can be optimized using {new_route}."]
}

def generate_feedback_data(n_rows=500):
    data = []
    for i in range(n_rows):
        feedback_id = f"F{i+1:03d}"
        date = datetime.datetime.now() - datetime.timedelta(days=random.randint(1, 365))
        category = random.choice(categories)
        subcategory = random.choice(subcategories[category])
        city = random.choice(cities)
        industry = random.choice(industries)
        feedback_text = random.choice(feedback_templates["Route Efficiency"]).format(route=random.choice(routes), new_route=random.choice(routes))
        rating = random.randint(1, 5)
        data.append([feedback_id, date, category, subcategory, city, industry, feedback_text, rating])
    return pd.DataFrame(data, columns=["Feedback_ID", "Date", "Category", "Subcategory", "City", "Industry", "Feedback", "Rating"])

# Generate Data
df = generate_feedback_data(1000)

# Data Preprocessing
label_encoders = {}
for col in ["Category", "Subcategory", "City", "Industry"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

X_text = df["Feedback"]
y = df["Rating"]

vectorizer = TfidfVectorizer()
X_text_vec = vectorizer.fit_transform(X_text)

X_other = df[["Category", "Subcategory", "City", "Industry"]]
X = np.hstack((X_other.values, X_text_vec.toarray()))

# Train Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save Model & Vectorizer
joblib.dump(model, "feedback_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

# Prediction Function
def predict_feedback_rating(feedback_text, category, subcategory, city, industry):
    model = joblib.load("feedback_model.pkl")
    vectorizer = joblib.load("vectorizer.pkl")
    label_encoders = joblib.load("label_encoders.pkl")
    
    category = label_encoders["Category"].transform([category])[0]
    subcategory = label_encoders["Subcategory"].transform([subcategory])[0]
    city = label_encoders["City"].transform([city])[0]
    industry = label_encoders["Industry"].transform([industry])[0]
    
    text_vector = vectorizer.transform([feedback_text]).toarray()
    input_features = np.hstack(([[category, subcategory, city, industry]], text_vector))
    predicted_rating = model.predict(input_features)[0]
    return predicted_rating

# Example Usage
example_text = "The route Mumbai-Delhi has frequent delays."
predicted_rating = predict_feedback_rating(example_text, "Transportation", "Route Efficiency", "Mumbai", "Steel")
print(f"Predicted Rating: {predicted_rating}");

import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_squared_error

# 🔹 Load Dataset (Modify the filename as needed)
df = pd.read_csv("feedback.csv")  # Ensure dataset contains: 'FeedbackText', 'Category', 'Subcategory', 'City', 'Industry', 'Rating'

# 🔹 Encode Categorical Columns
label_encoders = {}
for col in ["Category", "Subcategory", "City", "Industry"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])  # Convert text labels to numbers
    label_encoders[col] = le

# 🔹 Convert Text to TF-IDF Features
vectorizer = TfidfVectorizer(max_features=5000)  # Adjust feature size as needed
text_features = vectorizer.fit_transform(df["FeedbackText"]).toarray()

# 🔹 Combine Numerical and Text Features
X = np.hstack((df[["Category", "Subcategory", "City", "Industry"]].values, text_features))
y = df["Rating"].values  # Target variable

# 🔹 Train-Test Split (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🔹 Fine-Tune Model using GridSearchCV
param_grid = {
    "n_estimators": [100, 200, 300],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}

rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring="neg_mean_squared_error", n_jobs=-1)
grid_search.fit(X_train, y_train)

# 🔹 Get Best Model from Grid Search
best_model = grid_search.best_estimator_

# 🔹 Evaluate Model on Test Data
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Test MSE: {mse:.4f}")

# 🔹 Save the Fine-Tuned Model and Transformers
joblib.dump(best_model, "feedback_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

print("✅ Model fine-tuned and saved successfully!")
aram_grid = {
    "n_estimators": [100, 200, 300],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}

rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring="neg_mean_squared_error", n_jobs=-1)
grid_search.fit(X_train, y_train)

# 🔹 Get Best Model from Grid Search
best_model = grid_search.best_estimator_

# 🔹 Evaluate Model on Test Data
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Test MSE: {mse:.4f}")

# 🔹 Save the Fine-Tuned Model and Transformers
joblib.dump(best_model, "feedback_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

print("✅ Model fine-tuned with feature engineering and saved successfully!")
aram_grid = {
    "n_estimators": [100, 200, 300],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}

rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring="neg_mean_squared_error", n_jobs=-1)
grid_search.fit(X_train, y_train)

# 🔹 Get Best Model from Grid Search
best_model = grid_search.best_estimator_

# 🔹 Evaluate Model on Test Data
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Test MSE: {mse:.4f}")

# 🔹 Save the Fine-Tuned Model and Transformers
joblib.dump(best_model, "feedback_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

print("✅ Model fine-tuned with feature engineering and saved successfully!")

# ---- Run the Tool ----
if __name__ == "__main__":
    feedback_tool()


📊 Transportation & Logistics Feedback Analyzer 📊
- Analyze feedback with ratings, reviews, suggestions, and industry clusters.
- Commands:
  - 'analyze [item] in [location]' (e.g., 'analyze steel rods in Mumbai')
  - 'analyze [item]' (e.g., 'analyze cement')
  - 'analyze in [location]' (e.g., 'analyze in Delhi')
  - 'quit' to exit



Enter command:  analyze steel rods in mumbai 



Feedback Analysis for steel rods in mumbai:
----------------------------------------------------------------------
Feedback ID: F022
Date: 2024-09-14
Category: Transportation
SubCategory: Driver Performance
Location: Mumbai
Item: Steel Rods
Feedback: Rude driver upset the client.
Rating: 5/5
Route Suggested: None
Best Review: Rude driver upset the client.
Suggestion: Keep up the good work!
Price Optimization Suggestion: Pricing is optimal.
Industry Cluster: Nearby Industries: Pharmaceutical, Steel; Competitors: Comp2, Comp2
----------------------------------------------------------------------
Feedback ID: F068
Date: 2024-10-03
Category: Warehouse Management
SubCategory: Stock Organization
Location: Mumbai
Item: Steel Rods
Feedback: Digital inventory matched Steel Rods perfectly.
Rating: 3/5
Route Suggested: None
Best Review: N/A
Suggestion: Train drivers on time management.
Price Optimization Suggestion: Negotiate bulk discounts with suppliers.
Industry Cluster: Nearby Industries: St