In [1]:
import pandas as pd
from rapidfuzz import process, fuzz

In [2]:
df = pd.read_csv('NYC Fresh Jobs Postings.csv')

In [3]:
# Keep only relevant columns
df_new = df[['Agency', 'Business Title', 'Job Description', 
             'Salary Range From', 'Salary Range To', 
             'Work Location', 'Preferred Skills']].copy()


In [4]:
# Preprocess: fill NaNs and lowercase everything for better matching
df_new.fillna('', inplace=True)
df_new = df_new.applymap(lambda x: x.lower() if isinstance(x, str) else x)

  df_new = df_new.applymap(lambda x: x.lower() if isinstance(x, str) else x)


In [5]:
# Create a combined corpus with a mapping back to the index
corpus_map = []
for idx, row in df_new.iterrows():
    for field in ['Agency', 'Business Title', 'Job Description']:
        corpus_map.append((row[field], idx))

corpus_texts = [text for text, _ in corpus_map]


In [6]:
# Matching function using RapidFuzz
def get_recommendation(search, df, limit=10):
    search = search.lower().strip()
    matches = process.extract(search, corpus_texts, scorer=fuzz.token_set_ratio, limit=limit)

    seen_indices = set()
    recommendations = []

    for match_text, score, match_index in matches:
        idx = corpus_map[match_index][1]
        if idx not in seen_indices:
            seen_indices.add(idx)
            recommendations.append((
                df.at[idx, 'Business Title'].title(),
                df.at[idx, 'Agency'].title(),
                df.at[idx, 'Salary Range From'],
                df.at[idx, 'Salary Range To'],
                df.at[idx, 'Work Location'].title(),
                df.at[idx, 'Preferred Skills']
            ))
    return recommendations

In [8]:
# Get user input and show results
search = input("Enter the keyword for job search: ")
recommendations = get_recommendation(search, df_new)

print("\nRecommendations:")
print("(Business Title, Agency, Salary From, Salary To, Work Location, Preferred Skills)")
for rec in recommendations:
    print(rec)

Enter the keyword for job search:  Data Analytics



Recommendations:
(Business Title, Agency, Salary From, Salary To, Work Location, Preferred Skills)
('Business Analyst', 'Nyc Housing Authority', 56013.0, 84862.0, 'Perf Tracking And Analytics', '1.\tdata management skills including constructing datasets for specific analyses and cleaning, aggregating and matching large data sets. 2.\tminimum 2 years of experience with standard query language (sql) and data models. 3.\tability to compile data by querying databases and joining tables from various sources.  4.\texperience developing analytic plans and conducting statistical analyses. 5.\tstrong knowledge of data visualization tools such as microsoft power bi. 6.\tproficiency in microsoft office:  excel, outlook, powerpoint, word, microsoft teams. 7.\thighly organized with the ability to work under stringent deadlines, manage multiple assignments and handle sensitive information. 8.\tmust have experience working with cross-functional teams for project delivery.')
('Executive Director, Off