In [1]:
import os
import pandas as pd

# Set your paths
RAW_DIR = "Assignment 2/Data Connections"
CLEANED_DIR = "Assignment 2/Cleaned Connections"

# Create cleaned folder if it doesn't exist
os.makedirs(CLEANED_DIR, exist_ok=True)

In [3]:
def extract_student_name_from_filename(filename):
    try:
        name_part = filename.split(' - ')[-1].replace('.csv', '')
        name_words = name_part.strip().split()
        cleaned_name = '_'.join([word.title() for word in name_words])
        return cleaned_name
    except Exception as e:
        print(f"Error in filename parsing: {filename}, {e}")
        return None

In [4]:
def get_cleaned_full_name(first, last):
    if pd.isna(first) and pd.isna(last):
        return None  # Skip if both are missing

    first = str(first).strip() if pd.notna(first) else ''
    last = str(last).strip() if pd.notna(last) else ''
    
    full_name = (first + ' ' + last).strip()
    name_words = full_name.split()
    cleaned_name = '_'.join([word.title() for word in name_words])
    
    return cleaned_name if cleaned_name else None


In [7]:
for file in os.listdir(RAW_DIR):
    if not file.endswith(".csv"):
        continue
    
    file_path = os.path.join(RAW_DIR, file)

    # Try reading with UTF-8, fallback to ISO-8859-1
    try:
        df = pd.read_csv(file_path, encoding='utf-8')
    except UnicodeDecodeError:
        try:
            df = pd.read_csv(file_path, encoding='ISO-8859-1')
            print(f"[Warning] Used fallback encoding for {file}")
        except Exception as e:
            print(f"[Error] Could not read {file} with fallback encoding: {e}")
            continue
    except Exception as e:
        print(f"[Error] Could not read {file}: {e}")
        continue

    cleaned_rows = []

    for _, row in df.iterrows():
        cleaned_name = get_cleaned_full_name(row.get("First Name"), row.get("Last Name"))
        if cleaned_name:
            company = row.get("Company", "")
            cleaned_rows.append({
                "Full Name": cleaned_name,
                "Company": company
            })

    if cleaned_rows:
        cleaned_df = pd.DataFrame(cleaned_rows)
        output_path = os.path.join(CLEANED_DIR, file)
        cleaned_df.to_csv(output_path, index=False)
        print(f"✅ Saved cleaned file: {output_path}")
    else:
        print(f"⚠️ No valid rows in file: {file}")


✅ Saved cleaned file: Assignment 2/Cleaned Connections\Aaditya_Raj - Aaditya Raj.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Abhishek_Singh - Abhishek Singh.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Aditya_Singh - Aditya NO-LASTNAME.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Afzal_Raza - Afzl Raza.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Ajay Jatav Connections-1 - Ajay Jatav.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Ajit_Yadav - Ajit Yadav.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Akanksha_Kushwaha - Akanksha.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Alok_raj - Alok Raj.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Aman_ Adarsh.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Aman_Singh - Aman Singh.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\amit_kumar - Amit Kumar.csv
✅ Saved cleaned file: Assignment 2/Cleaned Connections\Anamika_K