In [1]:

# --- AI-based LinkedIn Profile Classifier (Final Prototype) ---
# Author: Mudit Agarwal (Inovexus Interview)

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

# ---------- 1. Load dataset ----------
df = pd.read_csv("Test Data - Sheet1 (1) (1).csv")

# ---------- 2. Add mock LinkedIn headlines ----------
# You can adjust these to match the number of rows in your CSV
mock_headlines = [
    "Founder and CEO of a SaaS startup recently acquired",
    "Angel investor and mentor in AI companies",
    "Ex-McKinsey consultant turned startup operator",
    "CTO at Amazon Web Services",
    "Board member and private equity investor",
    "Leadership coach for startup founders",
    "Partner at venture capital firm and ex-consultant",
    "Chief Product Officer at Google",
    "Serial entrepreneur with multiple exits",
    "Business angel investing in tech startups"
]
df["Headline"] = (mock_headlines * (len(df) // len(mock_headlines) + 1))[:len(df)]

# ---------- 3. Training data (labels + text examples) ----------
train_data = {
    "text": [
        "Founder and exited entrepreneur from a SaaS startup",
        "Angel investor in multiple AI companies",
        "Leadership coach and mentor for founders",
        "Chief Technology Officer at Google",
        "Board member and private equity investor",
        "Ex-McKinsey consultant turned startup operator"
    ],
    "category": [
        "exited entrepreneurs",
        "serial business angels",
        "top mentors",
        "Big Tech C-levels",
        "board members/private investors",
        "ex-consulting"
    ]
}
train_df = pd.DataFrame(train_data)

# ---------- 4. Train classifier ----------
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(train_df["text"])
y_train = train_df["category"]

model = MultinomialNB()
model.fit(X_train, y_train)

# ---------- 5. Predict for your dataset ----------
X_test = vectorizer.transform(df["Headline"])
df["Predicted Profile Type"] = model.predict(X_test)

# ---------- 6. Save output (clean format) ----------
output_df = df[["First Name", "LinkedIn URL", "Predicted Profile Type"]]
output_df.to_csv("classified_profiles.csv", index=False)

print(" Classification complete! Saved as 'classified_profiles.csv'")
print(output_df)


 Classification complete! Saved as 'classified_profiles.csv'
   First Name                                       LinkedIn URL  \
0    Benjamin                     https://linkedin.com/in/benmah   
1        Hani   https://www.linkedin.com/in/hani-baramki-1406335   
2       Régis          https://www.linkedin.com/in/regiscazenave   
3       Marco           https://www.linkedin.com/in/marcocaronna   
4    Frédéric      https://www.linkedin.com/in/frederic-queudret   
5        Alex             https://www.linkedin.com/in/avassilev1   
6        Rémi  https://www.linkedin.com/in/r%C3%A9mi-quintin-...   
7    Matthieu    https://www.linkedin.com/in/matthieu-side-side/   
8        Lars             https://www.linkedin.com/in/larsrossen   
9   Guillaume  https://www.linkedin.com/in/guillaume-rodenas-...   
10      Julia    https://www.linkedin.com/in/julia-grab-879a4844   
11      Marie  https://www.linkedin.com/in/marie-renaudin-058...   
12      Sunil     https://www.linkedin.com/in/sunil-dut