In [13]:
import pandas as pd
import datetime

In [62]:
df = pd.read_csv("data.csv")
df.head()

Unnamed: 0,ID,Name,Age,Gender,Degrees,Skills,Project ID,Deadline,Progress
0,75289-M446-6744,Julie Jenkins,35,Male,Master Data Science and Artificial Intelligenc...,Data Analytics,607,2023-04-09,92
1,17009-D629-0325,Brian Nguyen,34,Female,Doctoral Civil Engineering,Spreadsheet,430,2023-03-27,27
2,19846-H913-8329,Justin Friedman,19,Female,Master Electrical Engineering; Doctoral Mechan...,Python; HTML; Machine Learning; Backend Engine...,242,2023-03-25,17
3,09653-K667-7180,Karen Wright,38,Male,Doctoral Computer Engineering; Master Computer...,Artifical Intelligence; C; C++; DevOps; UI/UX ...,293,2023-03-16,30
4,21127-Q322-3686,Yesenia Smith,20,Female,Doctoral Physical and Mathematical Sciences,PHP; C; HTML; Data Analytics; Spreadsheet; Jav...,540,2023-03-26,51


In [63]:
def filter_by_skills(df, skills):
    filtered_rows = []
    for index, row in df.iterrows():
        row_skills = row["Skills"].split("; ")

        if any(skill in row_skills for skill in skills):
            filtered_rows.append(row)
            
    filtered_df = pd.DataFrame(filtered_rows)
    
    return filtered_df

def filter_skills(df, skills):
    filtered_df = pd.DataFrame()
    for i in range(len(df)):
        person_skills = df.iloc[i]['Skills'].split(';')
        if set(skills).issubset(set(person_skills)):
            filtered_df = filtered_df.append(df.iloc[i])
    return filtered_df

def calculate_days_left(filtered_df, deadline_col):
    today = datetime.date.today()
    deadlines = filtered_df[deadline_col].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date())
    days_left = (deadlines - today).apply(lambda x: x.days)
    result_df = filtered_df.copy()
    result_df['days_left'] = days_left
    return result_df

def calculate_score(row):
    time_left = row['days_left']
    progress = row['Progress']
    progress_norm = progress / 100
    time_left_norm = max(min(time_left, 1), 0)
    score = 0.7 * time_left_norm + 0.3 * progress_norm
    return score

def calculate_score_2(row):
    time_left = row['days_left']
    progress = row['Progress']
    score = time_left / (progress + 1)
    return score

In [58]:
skills_to_filter = ["Python", "JavaScript", "C++"]
filtered_df = filter_by_skills(df, skills_to_filter)
new_df = calculate_days_left(filtered_df, 'Deadline')
new_df['score'] = new_df.apply(calculate_score_2, axis=1)

In [60]:
new_df = new_df[(new_df['Progress'] >= 98) &( new_df['days_left'] >= 20)]
new_df.head(5)

Unnamed: 0,ID,Name,Age,Gender,Degrees,Skills,Project ID,Deadline,Progress,days_left,score
58,92991-T038-7485,Brian Miller,30,Female,Doctoral Physical and Mathematical Sciences; D...,Python; Backend Engineer; JavaScript; Artifica...,265,2023-04-05,100,24,0.237624
87,07949-D258-4282,Laurie Spencer,20,Male,Master Computer Engineering; Bachelor Electric...,Artifical Intelligence; Machine Learning; Spre...,147,2023-04-08,99,27,0.27
222,88170-Z157-2316,Jeffrey Flores,27,Female,Master Electrical Engineering; Doctoral Comput...,C++; Cybersecurity,449,2023-04-07,100,26,0.257426
354,27856-K253-8260,Timothy Dunn,32,Male,Master Civil Engineering; Master Mechanical En...,JavaScript; PHP; Frontend Engineer; Data Scien...,951,2023-04-06,100,25,0.247525
524,20728-V642-0880,Zachary Guerra,28,Female,Bachelor Data Science and Artificial Intelligence,JavaScript; Spreadsheet; Data Science; Cyberse...,583,2023-04-05,99,24,0.24


In [61]:
def final(df, skills_to_filter):
    filtered_df = filter_by_skills(df, skills_to_filter)
    new_df = calculate_days_left(filtered_df, 'Deadline')
    new_df['score'] = new_df.apply(calculate_score_2, axis=1)
    new_df = new_df[(new_df['Progress'] >= 98) &( new_df['days_left'] >= 20)]
    return new_df

In [64]:
new_df = final(df, ["Python", "JavaScript", "C++"])
new_df.head()

Unnamed: 0,ID,Name,Age,Gender,Degrees,Skills,Project ID,Deadline,Progress,days_left,score
58,92991-T038-7485,Brian Miller,30,Female,Doctoral Physical and Mathematical Sciences; D...,Python; Backend Engineer; JavaScript; Artifica...,265,2023-04-05,100,24,0.237624
87,07949-D258-4282,Laurie Spencer,20,Male,Master Computer Engineering; Bachelor Electric...,Artifical Intelligence; Machine Learning; Spre...,147,2023-04-08,99,27,0.27
222,88170-Z157-2316,Jeffrey Flores,27,Female,Master Electrical Engineering; Doctoral Comput...,C++; Cybersecurity,449,2023-04-07,100,26,0.257426
354,27856-K253-8260,Timothy Dunn,32,Male,Master Civil Engineering; Master Mechanical En...,JavaScript; PHP; Frontend Engineer; Data Scien...,951,2023-04-06,100,25,0.247525
524,20728-V642-0880,Zachary Guerra,28,Female,Bachelor Data Science and Artificial Intelligence,JavaScript; Spreadsheet; Data Science; Cyberse...,583,2023-04-05,99,24,0.24
