<a href="https://colab.research.google.com/github/nahmdd/Arc-GIS-project/blob/main/Recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

In [2]:
# ============================================================================
# CONFIGURATION
# ============================================================================

DATA_FILE = 'customer_support_issues.csv'
OUTPUT_DIR = './'

In [3]:
# ============================================================================
# STEP 1: DATA LOADING & EXPLORATION
# ============================================================================

def load_data(filepath):
    """Load and prepare data"""
    df = pd.read_csv(filepath)
    df['date_reported'] = pd.to_datetime(df['date_reported'])
    return df


In [4]:
# ============================================================================
# STEP 2: COMPREHENSIVE ISSUE ANALYSIS
# ============================================================================

def analyze_issues(df):
    """Perform detailed issue analysis"""

    print("\n" + "="*70)
    print("CUSTOMER SUPPORT ISSUE ANALYSIS & INSIGHTS")
    print("="*70)

    # 1. Issue frequency
    print("\n1. ISSUE FREQUENCY DISTRIBUTION:")
    print("-" * 50)
    issue_counts = df['issue_type'].value_counts()
    issue_percentage = (issue_counts / len(df) * 100).round(2)
    for issue, count in issue_counts.items():
        pct = issue_percentage[issue]
        print(f"   {issue:.<40} {count:>3} ({pct:>5}%)")

    # 2. Severity analysis
    print("\n2. SEVERITY ANALYSIS BY ISSUE TYPE:")
    print("-" * 50)
    severity_by_issue = df.groupby('issue_type')['severity'].agg(['mean', 'max', 'min'])
    severity_by_issue = severity_by_issue.sort_values('mean', ascending=False)
    for issue, row in severity_by_issue.iterrows():
        print(f"   {issue:.<35} Avg: {row['mean']:.2f} | Max: {int(row['max'])} | Min: {int(row['min'])}")

    # 3. Resolution metrics
    print("\n3. RESOLUTION SUCCESS METRICS:")
    print("-" * 50)
    total = len(df)
    resolved = df['resolved'].sum()
    resolution_rate = (resolved / total * 100)
    print(f"   Total Tickets: {total}")
    print(f"   Resolved: {resolved} ({resolution_rate:.1f}%)")
    print(f"   Unresolved: {total - resolved} ({100-resolution_rate:.1f}%)")

    # 4. Resolution time
    print("\n4. RESOLUTION TIME METRICS (Hours):")
    print("-" * 50)
    resolved_df = df[df['resolved'] == True]
    print(f"   Average: {resolved_df['resolution_time_hours'].mean():.1f}h")
    print(f"   Median: {resolved_df['resolution_time_hours'].median():.0f}h")
    print(f"   Max: {resolved_df['resolution_time_hours'].max()}h")
    print(f"   Min: {resolved_df['resolution_time_hours'].min()}h")

    # 5. Most affected products
    print("\n5. MOST AFFECTED PRODUCTS:")
    print("-" * 50)
    product_counts = df['product'].value_counts()
    for product, count in product_counts.items():
        pct = (count / len(df) * 100)
        print(f"   {product:.<40} {count:>3} ({pct:>5.1f}%)")

    # 6. High severity issues
    print("\n6. HIGH SEVERITY ISSUES (Severity >= 4):")
    print("-" * 50)
    high_severity = df[df['severity'] >= 4]
    high_sev_issues = high_severity['issue_type'].value_counts()
    for issue, count in high_sev_issues.items():
        pct = (count / len(high_severity) * 100)
        print(f"   {issue:.<40} {count:>3} ({pct:>5.1f}%)")

    print("\n" + "="*70)

    return {
        'issue_counts': issue_counts,
        'severity_analysis': severity_by_issue,
        'resolution_rate': resolution_rate,
        'product_counts': product_counts
    }

In [5]:
# ============================================================================
# STEP 3: BUILD RECOMMENDATION ENGINE
# ============================================================================

def build_recommendation_engine(df):
    """Build intelligent recommendation system"""

    print("\n" + "="*70)
    print("BUILDING RECOMMENDATION ENGINE")
    print("="*70)

    # Encode categorical features
    le_issue = LabelEncoder()
    le_product = LabelEncoder()
    le_solution = LabelEncoder()

    df['issue_encoded'] = le_issue.fit_transform(df['issue_type'])
    df['product_encoded'] = le_product.fit_transform(df['product'])
    df['solution_encoded'] = le_solution.fit_transform(df['recommended_solution'])

    # Build feature matrix
    feature_matrix = np.column_stack([
        df['issue_encoded'],
        df['product_encoded'],
        df['severity'],
        df['resolved'].astype(int),
        df['resolution_time_hours'] / 100
    ])

    # Calculate solution effectiveness
    solution_effectiveness = df.groupby('recommended_solution').agg({
        'resolved': 'mean',
        'resolution_time_hours': 'mean',
        'severity': 'mean'
    }).reset_index()

    solution_effectiveness['effectiveness_score'] = (
        (solution_effectiveness['resolved'] * 100) -
        (solution_effectiveness['resolution_time_hours'] / 72 * 20)
    )

    solution_scores = dict(zip(
        solution_effectiveness['recommended_solution'],
        solution_effectiveness['effectiveness_score']
    ))

    print("\nSOLUTION EFFECTIVENESS SCORES:")
    print("-" * 50)
    for solution, score in sorted(solution_scores.items(), key=lambda x: x[1], reverse=True)[:10]:
        print(f"   {solution:.<40} Score: {score:>6.2f}")

    return {
        'feature_matrix': feature_matrix,
        'encoders': {
            'issue': le_issue,
            'product': le_product,
            'solution': le_solution
        },
        'solution_scores': solution_scores,
        'df': df
    }

In [6]:
# ============================================================================
# STEP 4: RECOMMENDATION FUNCTION
# ============================================================================

def recommend_products(engine, issue_type, product, severity, top_n=5):
    """
    Generate product recommendations based on issue characteristics

    Args:
        engine: Recommendation engine object
        issue_type: Type of issue reported
        product: Affected product
        severity: Issue severity (1-5)
        top_n: Number of recommendations

    Returns:
        List of recommended solutions with scores
    """

    le_issue = engine['encoders']['issue']
    le_product = engine['encoders']['product']
    feature_matrix = engine['feature_matrix']
    solution_scores = engine['solution_scores']
    df = engine['df']

    # Create query vector
    issue_idx = le_issue.transform([issue_type])[0]
    product_idx = le_product.transform([product])[0]

    query_vector = np.array([
        issue_idx,
        product_idx,
        severity,
        1,
        50 / 100
    ]).reshape(1, -1)

    # Calculate similarity
    similarities = cosine_similarity(query_vector, feature_matrix)[0]
    top_indices = np.argsort(similarities)[::-1][:top_n * 3]

    # Extract and rank recommendations
    recommended_solutions = df.iloc[top_indices]['recommended_solution'].values
    recommendations = []

    for solution in recommended_solutions:
        if solution not in [r['product'] for r in recommendations]:
            recommendations.append({
                'product': solution,
                'effectiveness_score': solution_scores.get(solution, 0),
                'frequency': len(df[df['recommended_solution'] == solution])
            })

    return sorted(recommendations, key=lambda x: x['effectiveness_score'], reverse=True)[:top_n]


In [7]:
# ============================================================================
# STEP 5: VISUALIZATION FUNCTIONS
# ============================================================================

def create_analysis_dashboard(df):
    """Create comprehensive analysis dashboard"""

    print("\nGenerating visualizations...")

    fig = plt.figure(figsize=(18, 14))

    # 1. Issue distribution
    ax1 = plt.subplot(3, 3, 1)
    issue_counts = df['issue_type'].value_counts()
    colors = sns.color_palette("husl", len(issue_counts))
    ax1.pie(issue_counts.values, labels=issue_counts.index, autopct='%1.1f%%', colors=colors)
    ax1.set_title('Issue Type Distribution', fontweight='bold')

    # 2. Issue frequency
    ax2 = plt.subplot(3, 3, 2)
    issue_counts.plot(kind='barh', ax=ax2, color=colors)
    ax2.set_xlabel('Count')
    ax2.set_title('Issue Frequency', fontweight='bold')
    ax2.invert_yaxis()

    # 3. Severity distribution
    ax3 = plt.subplot(3, 3, 3)
    severity_dist = df['severity'].value_counts().sort_index()
    ax3.bar(severity_dist.index, severity_dist.values,
            color=['#2ecc71', '#f39c12', '#e74c3c', '#c0392b', '#8b0000'][:len(severity_dist)])
    ax3.set_xlabel('Severity Level')
    ax3.set_ylabel('Count')
    ax3.set_title('Severity Distribution', fontweight='bold')

    # 4. Resolution rate
    ax4 = plt.subplot(3, 3, 4)
    resolution_rate = (df['resolved'].sum() / len(df) * 100)
    ax4.pie([resolution_rate, 100-resolution_rate], labels=['Resolved', 'Unresolved'],
            autopct='%1.1f%%', colors=['#2ecc71', '#e74c3c'])
    ax4.set_title('Resolution Success Rate', fontweight='bold')

    # 5. Resolution time by issue
    ax5 = plt.subplot(3, 3, 5)
    avg_time = df.groupby('issue_type')['resolution_time_hours'].mean().sort_values(ascending=False)
    ax5.barh(range(len(avg_time)), avg_time.values, color=sns.color_palette("RdYlGn_r", len(avg_time)))
    ax5.set_yticks(range(len(avg_time)))
    ax5.set_yticklabels(avg_time.index)
    ax5.set_xlabel('Avg Time (Hours)')
    ax5.set_title('Resolution Time by Issue', fontweight='bold')

    # 6. Products affected
    ax6 = plt.subplot(3, 3, 6)
    product_counts = df['product'].value_counts()
    ax6.bar(range(len(product_counts)), product_counts.values,
            color=sns.color_palette("coolwarm", len(product_counts)))
    ax6.set_xticks(range(len(product_counts)))
    ax6.set_xticklabels(product_counts.index, rotation=45, ha='right')
    ax6.set_ylabel('Issues')
    ax6.set_title('Issues by Product', fontweight='bold')

    # 7. Heatmap
    ax7 = plt.subplot(3, 3, 7)
    pivot_data = pd.crosstab(df['issue_type'], df['severity'])
    sns.heatmap(pivot_data, annot=True, fmt='d', cmap='YlOrRd', ax=ax7)
    ax7.set_title('Issue Type vs Severity', fontweight='bold')

    # 8. Resolution by issue
    ax8 = plt.subplot(3, 3, 8)
    res_by_issue = df.groupby('issue_type')['resolved'].apply(lambda x: x.sum()/len(x)*100).sort_values(ascending=False)
    ax8.barh(range(len(res_by_issue)), res_by_issue.values, color=sns.color_palette("Greens_d", len(res_by_issue)))
    ax8.set_yticks(range(len(res_by_issue)))
    ax8.set_yticklabels(res_by_issue.index)
    ax8.set_xlabel('Resolution Rate (%)')
    ax8.set_title('Success Rate by Issue', fontweight='bold')

    # 9. Temporal trend
    ax9 = plt.subplot(3, 3, 9)
    issues_monthly = df.set_index('date_reported').resample('M').size()
    ax9.plot(issues_monthly.index, issues_monthly.values, marker='o', linewidth=2, markersize=8, color='#3498db')
    ax9.fill_between(issues_monthly.index, issues_monthly.values, alpha=0.3, color='#3498db')
    ax9.set_xlabel('Month')
    ax9.set_ylabel('Issues')
    ax9.set_title('Ticket Volume Over Time', fontweight='bold')
    ax9.tick_params(axis='x', rotation=45)

    plt.tight_layout()
    plt.savefig(f'{OUTPUT_DIR}analysis_dashboard.png', dpi=300, bbox_inches='tight')
    print("✓ Dashboard saved: analysis_dashboard.png")
    plt.close()


In [8]:
# ============================================================================
# STEP 6: EXPORT RESULTS
# ============================================================================

def export_recommendations_report(engine):
    """Generate and export detailed recommendations report"""

    df = engine['df']

    # Test cases
    test_cases = [
        ('Software Bug', 'Cloud Analytics', 5),
        ('Performance Issues', 'API Gateway', 4),
        ('Data Sync Problem', 'Mobile App', 5),
        ('Connectivity Issues', 'API Gateway', 3),
        ('Authentication Error', 'Software Suite Pro', 4),
    ]

    report = "PRODUCT RECOMMENDATION SYSTEM - RECOMMENDATIONS REPORT\n"
    report += "="*70 + "\n\n"

    for i, (issue, product, severity) in enumerate(test_cases, 1):
        recommendations = recommend_products(engine, issue, product, severity)

        report += f"CASE {i}: {issue}\n"
        report += f"Product: {product} | Severity: {severity}/5\n"
        report += "-"*70 + "\n"
        report += f"{'#':<3} {'Recommended Solution':<40} {'Score':<10} {'Freq':<6}\n"
        report += "-"*70 + "\n"

        for j, rec in enumerate(recommendations, 1):
            report += f"{j:<3} {rec['product']:<40} {rec['effectiveness_score']:>6.2f}   {rec['frequency']:>4}\n"

        report += "\n"

    with open(f'{OUTPUT_DIR}recommendations_report.txt', 'w') as f:
        f.write(report)

    print("✓ Recommendations report saved: recommendations_report.txt")

def export_analysis_summary(analysis_results):
    """Export analysis summary"""

    summary = "DATA ANALYSIS SUMMARY\n"
    summary += "="*70 + "\n\n"

    summary += "ISSUE FREQUENCY:\n"
    for issue, count in analysis_results['issue_counts'].items():
        summary += f"  {issue}: {count}\n"

    summary += "\nSEVERITY ANALYSIS:\n"
    for issue, row in analysis_results['severity_analysis'].iterrows():
        summary += f"  {issue}: Avg={row['mean']:.2f}, Max={int(row['max'])}, Min={int(row['min'])}\n"

    summary += f"\nRESOLUTION RATE: {analysis_results['resolution_rate']:.1f}%\n"

    with open(f'{OUTPUT_DIR}analysis_summary.txt', 'w') as f:
        f.write(summary)

    print("✓ Analysis summary saved: analysis_summary.txt")


In [9]:
# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":

    print("\n" + "="*70)
    print("INTELLIGENT PRODUCT RECOMMENDATION SYSTEM")
    print("="*70)

    # Step 1: Load data
    print("\n[1] Loading data...")
    df = load_data(DATA_FILE)
    print(f"✓ Loaded {len(df)} records")

    # Step 2: Analyze issues
    print("\n[2] Analyzing issues...")
    analysis_results = analyze_issues(df)

    # Step 3: Build recommendation engine
    print("\n[3] Building recommendation engine...")
    engine = build_recommendation_engine(df)
    print("✓ Engine built successfully")

    # Step 4: Generate recommendations
    print("\n[4] Generating recommendations...")
    test_recommendations = recommend_products(
        engine, 'Software Bug', 'Cloud Analytics', 5
    )
    print("\nSample Recommendations for 'Software Bug' on 'Cloud Analytics' (Severity 5):")
    for i, rec in enumerate(test_recommendations, 1):
        print(f"   {i}. {rec['product']} (Score: {rec['effectiveness_score']:.2f})")


INTELLIGENT PRODUCT RECOMMENDATION SYSTEM

[1] Loading data...
✓ Loaded 500 records

[2] Analyzing issues...

CUSTOMER SUPPORT ISSUE ANALYSIS & INSIGHTS

1. ISSUE FREQUENCY DISTRIBUTION:
--------------------------------------------------
   Software Bug............................ 133 ( 26.6%)
   Performance Issues......................  93 ( 18.6%)
   Connectivity Issues.....................  85 ( 17.0%)
   Authentication Error....................  69 ( 13.8%)
   Data Sync Problem.......................  59 ( 11.8%)
   Compatibility Issue.....................  33 (  6.6%)
   Installation Error......................  28 (  5.6%)

2. SEVERITY ANALYSIS BY ISSUE TYPE:
--------------------------------------------------
   Software Bug....................... Avg: 3.93 | Max: 5 | Min: 3
   Data Sync Problem.................. Avg: 3.93 | Max: 5 | Min: 3
   Authentication Error............... Avg: 3.39 | Max: 4 | Min: 3
   Connectivity Issues................ Avg: 3.02 | Max: 4 | Min: 2
   Com

In [11]:
# Step 5: Create visualizations
print("\n[5] Creating visualizations...")
create_analysis_dashboard(df)


[5] Creating visualizations...

Generating visualizations...
✓ Dashboard saved: analysis_dashboard.png


In [13]:
# Step 6: Export results
print("\n[6] Exporting results...")
export_recommendations_report(engine)
export_analysis_summary(analysis_results)

print("\n" + "="*70)
print("EXECUTION COMPLETE!")
print("="*70)
print("\nGenerated files:")
print("  • customer_support_issues.csv - Dataset")
print("  • analysis_dashboard.png - Comprehensive visualization")
print("  • recommendations_report.txt - Detailed recommendations")
print("  • analysis_summary.txt - Analysis summary")
print("\n" + "="*70)


[6] Exporting results...
✓ Recommendations report saved: recommendations_report.txt
✓ Analysis summary saved: analysis_summary.txt

EXECUTION COMPLETE!

Generated files:
  • customer_support_issues.csv - Dataset
  • analysis_dashboard.png - Comprehensive visualization
  • recommendations_report.txt - Detailed recommendations
  • analysis_summary.txt - Analysis summary

