In [6]:
import pandas as pd

students = pd.read_csv("/content/students.csv")
internships = pd.read_csv("/content/opportunities.xlsx.csv")

In [10]:
students.columns


Index(['Timestamp', 'Full Name', 'Your Email', 'Your Field of Interest',
       'university', 'Major / Field of Study', 'Academic Year',
       'email address', 'Have you completed any internship before?',
       'Type of Internship:', 'Internship Field:', 'Company Name (Optional)',
       'Internship Duration:', 'Did you receive a completion certificate?',
       'How would you rate your overall internship experience? (1–5)',
       'What was the most valuable thing you learned during the internship? (Short answer)',
       'What challenges did you face during the internship? (Short answer)',
       'Did the internship help you improve your skills?',
       'Would you like a platform that helps you find the best internship opportunities for your field?',
       'What features would you like to see in our platform?',
       'Would you like to subscribe to receive internship updates and new opportunities?'],
      dtype='object')

In [11]:
internships.columns

Index(['companys name', 'type of content', 'location', 'description',
       'Requirements', 'application link', 'exp date', 'intern duration',
       '@dropdown'],
      dtype='object')

In [13]:
SKILL_WEIGHT = 0.50
MAJOR_WEIGHT = 0.20
LOCATION_WEIGHT = 0.15
EXPERIENCE_WEIGHT = 0.15
TOP_N = 5

def normalize_list(text):
    if pd.isna(text):
        return set()
    return set(x.strip().lower() for x in str(text).split(","))

def skill_match_score(student_skills, internship_skills):
    if not internship_skills:
        return 0.0
    return len(student_skills & internship_skills) / len(internship_skills)

def major_match_score(student_major, internship_category):
    s = student_major.lower()
    i = internship_category.lower()
    if s == i:
        return 1.0
    if s in i or i in s:
        return 0.5
    return 0.0

def location_match_score(student_pref, internship_location):
    if "remote" in internship_location.lower():
        return 1.0
    if student_pref.lower() == internship_location.lower():
        return 1.0
    return 0.0

def experience_match_score(experience_answer):
    if str(experience_answer).lower() == "yes":
        return 1.0
    return 0.5

students["skills_set"] = students["Your Field of Interest"].apply(normalize_list)
internships["skills_set"] = internships["Requirements"].apply(normalize_list)

In [14]:
students["skills_set"]

Unnamed: 0,skills_set
0,{data science}
1,{electronic}
2,{ai& data science}
3,{computer science}
4,{network and cyber security}
5,{technology}
6,{cyber security}
7,{cybersecurity}
8,{ai}
9,{network and cayber security}


In [20]:
# Matching Logic

matches = []

for _, student in students.iterrows():

    for idx, internship in internships.iterrows():

        # -------- Step 1: Hard Filters --------
        # Ensure 'type of content' and 'Major / Field of Study' are strings before calling .lower()
        if str(internship["type of content"]).lower() not in str(student["Major / Field of Study"]).lower():
            continue

        # -------- Step 2: Skill Matching --------
        skill_score = skill_match_score(
            student["skills_set"],
            internship["skills_set"]
        )

        # -------- Step 3: Major Matching --------
        # Ensure inputs are strings for major_match_score
        major_score = major_match_score(
            str(student["Major / Field of Study"]),
            str(internship["type of content"])
        )

        # -------- Step 4: Location Matching --------
        # Ensure inputs are strings for location_match_score
        location_score = location_match_score(
            str(student["Type of Internship:"]),
            str(internship["location"])
        )

        # -------- Step 5: Experience Matching --------
        # experience_match_score already handles str() conversion internally
        experience_score = experience_match_score(
            student["Did you receive a completion certificate?"]
        )

        # -------- Step 6: Final Score --------
        final_score = (
            SKILL_WEIGHT * skill_score +
            MAJOR_WEIGHT * major_score +
            LOCATION_WEIGHT * location_score +
            EXPERIENCE_WEIGHT * experience_score
        ) * 100

        matches.append({
            "student_email": student["Your Email"],
            "company": internship["companys name"], # Corrected column name
            "internship_category": internship["type of content"], # Corrected column name
            "match_score": round(final_score, 2),
            "application_link": internship["application link"] # Corrected column name
        })

# Output

matches_df = pd.DataFrame(matches)

matches_df["rank"] = matches_df.groupby("student_email")["match_score"] \
                                .rank(ascending=False, method="dense")

top_matches = matches_df[matches_df["rank"] <= TOP_N]

top_matches.to_csv("final_matching_results.csv", index=False)

print("Matching completed.")
print(top_matches)

Matching completed.
                       student_email    company internship_category  \
0             Seifehab040@gmail.com    WEINTERN                   T   
1       youssefabdallaatya@gmail.com   WEINTERN                   T   
2    abdelrahman240103470@sut.edu.eg  robotesta                 AI    
3    abdelrahman240103470@sut.edu.eg   WEINTERN                   T   
4    s-rahma.sayed@zewailcity.edu.eg  robotesta                 AI    
..                               ...        ...                 ...   
109            hassan37099@gmail.com   vodafone               cyber   
110            hassan37099@gmail.com   unicharm                  IT   
111            hassan37099@gmail.com     geidea               cyber   
112            hassan37099@gmail.com    XEFORT                cyber   
113            hassan37099@gmail.com   WEINTERN                   T   

     match_score                                   application_link  rank  
0           40.0                 https://we-intern.

**Match Score=0.5×Skill+0.2×Major+0.15×Location+0.15×Experience**