In [3]:
import pandas as pd
import string
import os

In [6]:

# simple list of stop words to filter out noise
STOP_WORDS = [
    'i', 'me', 'my', 'you', 'your', 'he', 'she', 'it', 'we', 'they', 
    'is', 'am', 'are', 'was', 'were', 'be', 'been', 'to', 'of', 'and', 
    'or', 'but', 'a', 'an', 'the', 'in', 'on', 'at', 'for', 'with', 
    'from', 'about', 'just', 'can', 'will', 'need', 'want', 'have', 'do'
]

# ====================================================================
# class 1: fidelityservicedesk
# ====================================================================

class FidelityServiceDesk:
    """represents a specific fidelity department and its keywords."""

    def __init__(self, name):
        # initializes the desk with a name and empty keyword list
        self.name = name.strip()
        self.keywords = [] 

    def add_keyword(self, word):
        # adds a keyword if it is unique
        clean_word = word.lower().strip()
        if clean_word and clean_word not in self.keywords:
            self.keywords.append(clean_word)

    def get_keywords(self):
        # returns the list of current keywords
        return self.keywords

    def score_text(self, text_tokens):
        # counts how many keywords appear in the text tokens
        score = 0
        for keyword in self.keywords:
            if keyword in text_tokens:
                score += 1
        return score

# ====================================================================
# class 2: routermodel
# ====================================================================

class RouterModel:
    """handles the core classification logic and keyword management."""

    FILE_PATH = "fidelity_router_config.txt"

    def __init__(self):
        self.desks = {}
        self._initialize_defaults()

    def _initialize_defaults(self):
        # sets up default desks to ensure the app works immediately
        defaults = {
            "Trading": ["buy", "sell", "stock", "trade", "order", "limit", "market"],
            "Retirement": ["ira", "401k", "roth", "rollover", "distribution", "beneficiary"],
            "Service": ["login", "password", "locked", "address", "profile", "check"],
            "Tax": ["1099", "tax", "form", "deduction", "withholding"]
        }
        for name, kws in defaults.items():
            desk = FidelityServiceDesk(name)
            for kw in kws:
                desk.add_keyword(kw)
            self.desks[name] = desk
        
        # tries to load saved work if it exists
        self.load_model()

    def predict_department(self, raw_text):
        # cleans text and finds the best matching department
        tokens = self._preprocess(raw_text)
        scores = {name: desk.score_text(tokens) for name, desk in self.desks.items()}
        
        # logic for tie-breaking or no matches
        if not scores or max(scores.values()) == 0:
            return "Uncertain", 0
        
        best_dept = max(scores, key=scores.get)
        return best_dept, scores[best_dept]

    def _preprocess(self, text):
        # basic cleaning of the text input
        if not isinstance(text, str): return []
        text = text.lower().translate(str.maketrans('', '', string.punctuation))
        return [w for w in text.split() if w not in STOP_WORDS]

    def modify_keywords(self, desk_name, new_keywords_str):
        # allows adding new keywords from a comma-separated string
        if desk_name in self.desks:
            words = [w.strip() for w in new_keywords_str.split(',') if w.strip()]
            for w in words:
                self.desks[desk_name].add_keyword(w)
            self.save_model()
            return True
        return False

    def save_model(self):
        # saves current keywords to a text file
        try:
            with open(self.FILE_PATH, 'w') as f:
                for name, desk in self.desks.items():
                    f.write(f"{name}:{','.join(desk.keywords)}\n")
        except:
            pass # ignore errors for simplicity

    def load_model(self):
        # loads keywords from file if present
        if not os.path.exists(self.FILE_PATH): return
        try:
            with open(self.FILE_PATH, 'r') as f:
                for line in f:
                    if ":" in line:
                        name, kws = line.strip().split(":", 1)
                        if name in self.desks:
                            for kw in kws.split(','):
                                self.desks[name].add_keyword(kw)
        except:
            pass

# ====================================================================
# class 3: dataframeprocessor
# ====================================================================

class DataFrameProcessor:
    """handles pandas operations for bulk reclassification."""

    def __init__(self, router):
        self.router = router
        self.confidence_threshold = 0.60 # cutoff for low confidence

    def process_file(self, filepath):
        # reads csv, identifying and fixing low confidence rows
        try:
            df = pd.read_csv(filepath)
            required_cols = ['customer_statement', 'department_routed', 'confidence_level']
            
            if not all(col in df.columns for col in required_cols):
                return None, "error: missing required columns in csv."

            # lists to store new results
            final_depts = []
            indicators = []

            for index, row in df.iterrows():
                conf = pd.to_numeric(row['confidence_level'], errors='coerce')
                
                # check if third-party model was unsure
                if conf < self.confidence_threshold:
                    new_dept, score = self.router.predict_department(row['customer_statement'])
                    
                    # only override if our rule-based score found something
                    if new_dept != "Uncertain":
                        final_depts.append(new_dept)
                        indicators.append("Reclassified (Low Conf)")
                    else:
                        final_depts.append(row['department_routed'])
                        indicators.append("Original (Low Conf - No Rule Match)")
                else:
                    final_depts.append(row['department_routed'])
                    indicators.append("Original")

            df['final_classification'] = final_depts
            df['processing_status'] = indicators
            return df, "success"
            
        except Exception as e:
            return None, f"file error: {str(e)}"

# ====================================================================
# class 4: appinterface
# ====================================================================

class AppInterface:
    """manages the menu and user input loop."""

    def __init__(self):
        self.router = RouterModel()
        self.processor = DataFrameProcessor(self.router)

    def _safe_input(self, prompt):
        # captures input and checks for quit command immediately
        user_in = input(prompt).strip()
        if user_in.lower() in ['quit', 'exit', 'q']:
            print("\nuser requested quit. exiting app.")
            exit()
        return user_in

    def _generate_sample_data(self):
        # helper to create a dummy file for testing option 1
        data = {
            'customer_statement': [
                "I need to reset my password immediately", 
                "i want to buy 100 shares of apple", 
                "what is the limit for my 401k contribution",
                "where is the tax form 1099 for last year"
            ],
            'department_routed': ["Service", "Service", "Service", "Service"],
            'confidence_level': [0.95, 0.40, 0.35, 0.45] 
        }
        # notice the last 3 are misclassified as 'service' with low confidence
        pd.DataFrame(data).to_csv("third_party_data.csv", index=False)
        print("\n(system note: created 'third_party_data.csv' for testing)")

    def run_option_1(self):
        # logic for uploading and processing dataframe
        print("\n--- upload & reclassify ---")
        self._generate_sample_data() # creates file for student convenience
        
        path = self._safe_input("enter csv filename (default: third_party_data.csv): ")
        if not path: path = "third_party_data.csv"

        df, status = self.processor.process_file(path)
        
        if df is None:
            print(status)
            return

        print("\n--- processing complete. preview of results: ---")
        print(df[['customer_statement', 'confidence_level', 'final_classification', 'processing_status']])
        
        # asks user validation
        feedback = self._safe_input("\ndoes this classification look correct? (yes/no): ").lower()
        if feedback == 'no':
            print("switching to keyword modification mode...")
            self.run_option_2()
        else:
            save_name = "reclassified_results.csv"
            df.to_csv(save_name, index=False)
            print(f"results saved to {save_name}")

    def run_option_2(self):
        # logic for modifying router keywords
        print("\n--- modify keywords ---")
        print("available departments:", list(self.router.desks.keys()))
        
        dept = self._safe_input("enter department to modify: ")
        if dept not in self.router.desks:
            print("department not found.")
            return

        print(f"current keywords for {dept}: {self.router.desks[dept].get_keywords()}")
        new_kws = self._safe_input("enter new keywords (comma-separated): ")
        
        if self.router.modify_keywords(dept, new_kws):
            print("keywords updated successfully.")
        else:
            print("no valid keywords provided.")

    def main_menu(self):
        # main application loop
        while True:
            print("\n" + "="*30)
            print(" FIDELITY DATA QUALITY APP")
            print("="*30)
            print("1. upload dataframe (fix low confidence)")
            print("2. modify keywords")
            print("3. quit")
            
            choice = self._safe_input("select option: ")

            if choice == '1':
                self.run_option_1()
            elif choice == '2':
                self.run_option_2()
            elif choice == '3':
                print("exiting application.")
                break
            else:
                print("invalid selection.")

# ====================================================================
# main execution
# ====================================================================

def main():
    app = AppInterface()
    app.main_menu()

if __name__ == "__main__":
    main()



 FIDELITY DATA QUALITY APP
1. upload dataframe (fix low confidence)
2. modify keywords
3. quit

--- upload & reclassify ---

(system note: created 'third_party_data.csv' for testing)

--- processing complete. preview of results: ---
                           customer_statement  confidence_level  \
0     I need to reset my password immediately              0.95   
1           i want to buy 100 shares of apple              0.40   
2  what is the limit for my 401k contribution              0.35   
3    where is the tax form 1099 for last year              0.45   

  final_classification        processing_status  
0              Service                 Original  
1              Trading  Reclassified (Low Conf)  
2              Trading  Reclassified (Low Conf)  
3                  Tax  Reclassified (Low Conf)  
switching to keyword modification mode...

--- modify keywords ---
available departments: ['Trading', 'Retirement', 'Service', 'Tax']
department not found.

 FIDELITY DATA QUALITY 