In [17]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import re

In [18]:
df = pd.read_csv('jobstreet_jobs.csv')

In [19]:
pd.set_option('display.max_columns', None)

In [20]:
df.head()

Unnamed: 0,Date Obtained,Link,Company,Position,Location,Classification,Industry,Salary,Work Arrangement,Day Posted
0,04/05/2025 22:18,https://ph.jobstreet.com/job/83980792?type=sta...,Mapúa University,Data Analyst,"Manila City, Metro Manila",Business/Systems Analysts,Information & Communication Technology,,,13h ago
1,04/05/2025 22:18,https://ph.jobstreet.com/job/83981404?type=sta...,Axos Bank,Data Analyst,"Manila City, Metro Manila",Business/Systems Analysts,Information & Communication Technology,,Hybrid,1d ago
2,04/05/2025 22:18,https://ph.jobstreet.com/job/83976786?type=sta...,WalterMart Community Mall,Data Analyst | Quezon City,"Quezon City, Metro Manila",Business/Systems Analysts,Information & Communication Technology,,,1d ago
3,04/05/2025 22:18,https://ph.jobstreet.com/job/83976299?type=sta...,"INFINITY SPORTS INTERNATIONAL, INC.",Data Analyst,"Makati City, Metro Manila",Analysis & Reporting,Banking & Financial Services,"₱25,000 – ₱30,000 per month",,1d ago
4,04/05/2025 22:18,https://ph.jobstreet.com/job/83970409?type=sta...,CITCO INTERNATIONAL SUPPORT SERVICES LIMITED-P...,Data Analyst (Alabang - Mid-shift),"Alabang, Metro Manila",Other,Accounting,,Hybrid,2d ago


## Transformations to do:

- Get job id from link ('/job/{job id}?') ✅
- Split salary column into currency_salary, min_salary, max_salary ✅
- Fill work arrangement na with 'Others' ✅
- Add date posted from date obtained minus day posted ✅

### Job ID

In [21]:
df['Job ID'] = df['Link'].str.split('/', expand = False).str[4]

In [22]:
df['Job ID'] = df['Job ID'].str.split('?', expand = False).str[0]

In [23]:
df.head()

Unnamed: 0,Date Obtained,Link,Company,Position,Location,Classification,Industry,Salary,Work Arrangement,Day Posted,Job ID
0,04/05/2025 22:18,https://ph.jobstreet.com/job/83980792?type=sta...,Mapúa University,Data Analyst,"Manila City, Metro Manila",Business/Systems Analysts,Information & Communication Technology,,,13h ago,83980792
1,04/05/2025 22:18,https://ph.jobstreet.com/job/83981404?type=sta...,Axos Bank,Data Analyst,"Manila City, Metro Manila",Business/Systems Analysts,Information & Communication Technology,,Hybrid,1d ago,83981404
2,04/05/2025 22:18,https://ph.jobstreet.com/job/83976786?type=sta...,WalterMart Community Mall,Data Analyst | Quezon City,"Quezon City, Metro Manila",Business/Systems Analysts,Information & Communication Technology,,,1d ago,83976786
3,04/05/2025 22:18,https://ph.jobstreet.com/job/83976299?type=sta...,"INFINITY SPORTS INTERNATIONAL, INC.",Data Analyst,"Makati City, Metro Manila",Analysis & Reporting,Banking & Financial Services,"₱25,000 – ₱30,000 per month",,1d ago,83976299
4,04/05/2025 22:18,https://ph.jobstreet.com/job/83970409?type=sta...,CITCO INTERNATIONAL SUPPORT SERVICES LIMITED-P...,Data Analyst (Alabang - Mid-shift),"Alabang, Metro Manila",Other,Accounting,,Hybrid,2d ago,83970409


In [24]:
df['Position'].unique()

array(['Data Analyst', 'Data Analyst | Quezon City',
       'Data Analyst (Alabang - Mid-shift)',
       'Data Analytics Associate - SeaBank', 'Data Engineer Team Leader',
       'Manager, Data & Analytics, Data Engineer',
       'Data Analyst - Contact Center',
       'Full-Stack Engineer (for AI & Automation)',
       'Data Science Specialist', 'Data Architect', 'ACTUARIAL ANALYST',
       'Data Analytics Senior Analyst', 'BI/Data Science Analyst',
       'Marketing Data Analyst', 'Data Engineer',
       'Data Engineer (WFH | DAYSHIFT)', 'DATA ANALYST',
       'Modelling and Data Science', 'Risk Data Science Manager',
       'Senior Software Engineer (Data & Analytics)  Hybrid Set Up - 7 Days a Month',
       'Data Engineer-Business Intelligence (Japanese Bilingual)',
       'Cloud Data Engineer | Pioneer Account | Quezon City',
       'Actuarial Analyst', 'Artificial Intelligence Specialist',
       'Data Engineer | Project-based | Work From Home | Day Shift | Weekends Off',
       

### Work Arrangement

In [25]:
df['Work Arrangement'].unique()

array([nan, 'Hybrid', 'Remote'], dtype=object)

In [26]:
df['Work Arrangement'] = df['Work Arrangement'].str.title().fillna('Other')

In [27]:
df.head()

Unnamed: 0,Date Obtained,Link,Company,Position,Location,Classification,Industry,Salary,Work Arrangement,Day Posted,Job ID
0,04/05/2025 22:18,https://ph.jobstreet.com/job/83980792?type=sta...,Mapúa University,Data Analyst,"Manila City, Metro Manila",Business/Systems Analysts,Information & Communication Technology,,Other,13h ago,83980792
1,04/05/2025 22:18,https://ph.jobstreet.com/job/83981404?type=sta...,Axos Bank,Data Analyst,"Manila City, Metro Manila",Business/Systems Analysts,Information & Communication Technology,,Hybrid,1d ago,83981404
2,04/05/2025 22:18,https://ph.jobstreet.com/job/83976786?type=sta...,WalterMart Community Mall,Data Analyst | Quezon City,"Quezon City, Metro Manila",Business/Systems Analysts,Information & Communication Technology,,Other,1d ago,83976786
3,04/05/2025 22:18,https://ph.jobstreet.com/job/83976299?type=sta...,"INFINITY SPORTS INTERNATIONAL, INC.",Data Analyst,"Makati City, Metro Manila",Analysis & Reporting,Banking & Financial Services,"₱25,000 – ₱30,000 per month",Other,1d ago,83976299
4,04/05/2025 22:18,https://ph.jobstreet.com/job/83970409?type=sta...,CITCO INTERNATIONAL SUPPORT SERVICES LIMITED-P...,Data Analyst (Alabang - Mid-shift),"Alabang, Metro Manila",Other,Accounting,,Hybrid,2d ago,83970409


### Salary Parsing to Salary Range

In [28]:
df['Salary'].unique()

array([nan, '₱25,000 – ₱30,000 per month', '₱25,000 – ₱35,000 per month',
       '₱80,000 – ₱85,000 per month', '₱130,000 – ₱140,000 per month',
       '₱120,000 – ₱145,000 per month', '₱60,000 – ₱80,000 per month',
       '₱40,000 – ₱50,000 per month', '₱100,000 – ₱150,000 per month',
       '₱50,000 – ₱70,000 per month', '₱38,000 – ₱48,000 per month',
       '₱80,000 – ₱90,000 per month', '₱30,000 – ₱45,000 per month',
       '₱30,000 – ₱35,000 per month', '₱100,000 – ₱130,000 per month',
       '₱100,000 – ₱120,000 per month', '₱59,260 per month',
       '₱70,000 – ₱85,000 per month', 'Budget - 90,000 to 100,000 PHP',
       '₱95,000 – ₱100,000 per month', '₱95,000 – ₱140,000 per month',
       '₱70,000 – ₱100,000 per month', '₱120,000 – ₱180,000 per month',
       '₱24,000 – ₱30,000 per month', 'PHP 50,000 - 70,000',
       '₱150,000 – ₱200,000 per month', '₱80,000 – ₱120,000 per month',
       '₱80,000 – ₱100,000 per month', '₱45,000 – ₱55,000 per month',
       '₱120,000 – ₱160,0

df['Salary'] = df['Salary'].fillna('unknown')

#### Salary format:
- ₱{lower_lim_salary} - ₱{upper_lim_salary} per month
- PHP{lower_lim_salary} to PHP{upper_lim_salary}
- PHP{lower_lim_salary} - PHP{upper_lim_salary}
- ₱{lower_lim_salary / upper_lim_salary} per month
- {lower_lim_salary} to {upper_lim_salary}PHP
- ₱{lower_lim_salary} - {upper_lim_salary}k base

Each format has a common denominator of having an amount which could either indicate the lower salary range or fixed salary amount. If the salary is expressed as a range it could either be followed by "–","-", or "to" and the upper salary range. Currency of the salary can either be indicated before the range or after the range in the form of "PHP" or "₱"

#### Building the general regex structure:
r'(?:php|₱)*\s*([\d,]+)\s*(?:-|–|to)\s*(?:php|₱)*\s*([\d,]+)\s*(?:\s*per\s*(month|year))?\s*'

A general salary format is used to handle conventional expressions of salary. This allows us to capture ranges that do not need further processing to log the correct information. I decided to divide the salary based on their case match to give me better tracking if there are new patterns that are not within the scope of the general regex. 

In [29]:
def parse_salary_test(s):
    #No salary informations is given
    if pd.isna(s):
        return (np.nan, np.nan, 'unknown', True, 0)
    
    s = s.lower()

    # Pattern 3: ₱35-40k base, ₱4K transport, ₱3,6K allowance (ranges expressed in thousands)
    if match := re.search(r'₱\s*([\d]+)\s*[-–]\s*([\d]+)\s*k?', s):
        return (int(match.group(1))*1000, int(match.group(2))*1000, 'unknown', False, 3)
        
    '''
    # Pattern A: ₱25,000 - ₱35,000 per month
    # Pattern B: PHP25,000 -/– PHP35,000
    # Pattern C: PHP25,000 -/– 35,000
    # Pattern D: PHP25,000 to PHP35,000
    # Pattern E: Budget - 20000 to 30000PHP
    '''
    
    #General Pattern (Covers patters A-E)
    if match := re.search(r'(?:php|₱)*\s*([\d,]+)\s*(?:-|–|to)\s*(?:php|₱)*\s*([\d,]+)\s*(?:php|₱)*\s*(?:\s*per\s*(month|year))?\s*', s):
        return (int(match.group(1).replace(',', '')), int(match.group(2).replace(',', '')), str(match.group(3)) +'ly' or 'unknown', False, 1)

    # Pattern 3: ₱25,000 per month (single value)
    if match := re.search(r'₱\s*([\d,]+)\s*per month', s):
        val = int(match.group(1).replace(',', ''))
        return (val, val, 'monthly', False, 4)
    
    # No number is present in salary information
    if match := re.search(r'\d{0}', s):
        return (np.nan, np.nan, 'unknown', False, 2)
    
    return (np.nan, np.nan, 'unknown', True, 0)

In [30]:
df[['salary_min', 'salary_max', 'salary_type', 'salary_unhandled', 'salary_pattern']] = df['Salary'].apply(
    lambda x: pd.Series(parse_salary_test(x))
)

In [31]:
df[(df['Salary'].notna()) & (df['salary_pattern'] == 4)]

Unnamed: 0,Date Obtained,Link,Company,Position,Location,Classification,Industry,Salary,Work Arrangement,Day Posted,Job ID,salary_min,salary_max,salary_type,salary_unhandled,salary_pattern
58,04/05/2025 22:18,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,5d ago,83890794,59260.0,59260.0,monthly,False,4
512,2025-05-05 13:35:21,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,5d ago,83890794,59260.0,59260.0,monthly,False,4
998,2025-05-06 21:34:52,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,7d ago,83890794,59260.0,59260.0,monthly,False,4
1494,2025-05-07 16:47:13,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,7d ago,83890794,59260.0,59260.0,monthly,False,4
2015,2025-05-08 17:30:06,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,9d ago,83890794,59260.0,59260.0,monthly,False,4
2538,2025-05-09 13:20:34,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,9d ago,83890794,59260.0,59260.0,monthly,False,4
3061,2025-05-10 14:43:42,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,10d ago,83890794,59260.0,59260.0,monthly,False,4
3578,2025-05-11 16:59:43,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,11d ago,83890794,59260.0,59260.0,monthly,False,4
4091,2025-05-12 21:06:18,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,13d ago,83890794,59260.0,59260.0,monthly,False,4
4613,2025-05-14 14:19:12,https://ph.jobstreet.com/job/83890794?type=sta...,POLYTECHNIC UNIVERSITY OF THE PHILIPPINES,"Senior Statistical Specialist (SG-19, Job Order)","Santa Mesa, Metro Manila",Customer Service - Call Centre,Call Centre & Customer Service,"₱59,260 per month",Other,14d ago,83890794,59260.0,59260.0,monthly,False,4


### Creating Date posted from Date Obtained and Day Posted

My scraped data only contains the date when it was obtained and the relative time that it was posted on Jobstreet. Our transformation should handle the computation of its estimated post date. Estimated is emphasized since we will be encountering job posts that are posted 30d+ ago. It could range from 31 days up to months. To get these information, it would be best to log unique job posts on a separate table where we will then get the post date. Any succeeding occurence of this job post would add up to the days posted counter since we are sure that only the relative date will change upon its recurrence.

In [32]:
df.dtypes

Date Obtained        object
Link                 object
Company              object
Position             object
Location             object
Classification       object
Industry             object
Salary               object
Work Arrangement     object
Day Posted           object
Job ID               object
salary_min          float64
salary_max          float64
salary_type          object
salary_unhandled       bool
salary_pattern        int64
dtype: object

In [33]:
df['Date Obtained'] = pd.to_datetime(df['Date Obtained'])

In [34]:
df['Day Posted'].unique()

array(['13h ago', '1d ago', '2d ago', '3d ago', '4d ago', '5d ago',
       '6d ago', '8d ago', '9d ago', '10d ago', '11d ago', '12d ago',
       '13d ago', '15d ago', '16d ago', '17d ago', '18d ago', '19d ago',
       '20d ago', '21d ago', '22d ago', '23d ago', '24d ago', '25d ago',
       '26d ago', '27d ago', '29d ago', '30d+ ago', '7m ago', '55m ago',
       '58m ago', '1h ago', '2h ago', '3h ago', '6h ago', '7d ago',
       '14d ago', '28d ago', '5h ago', '8h ago', '10h ago', '11h ago',
       '20h ago', '21h ago', '22h ago', '10m ago', '11m ago', '18m ago',
       '4h ago', '16h ago', '18h ago', '7h ago', '9h ago', '14h ago',
       '19h ago', '23h ago', '15h ago', '17h ago', '38m ago', '35m ago',
       '32m ago', '12h ago', '23m ago', '6m ago', '24m ago', '57m ago',
       '22m ago', '27m ago', '17m ago', '51m ago', '52m ago', '53m ago',
       '42m ago', '14m ago', '20m ago', '59m ago'], dtype=object)

In [35]:
df[df['Day Posted'] == '30d+ ago'].sort_values('Date Obtained', ascending = False)

Unnamed: 0,Date Obtained,Link,Company,Position,Location,Classification,Industry,Salary,Work Arrangement,Day Posted,Job ID,salary_min,salary_max,salary_type,salary_unhandled,salary_pattern
16527,2025-06-04 12:47:46,https://ph.jobstreet.com/job/77220481?type=sta...,FullTeam,Data Engineer,Metro Manila,Developers/Programmers,Information & Communication Technology,,Hybrid,30d+ ago,77220481,,,unknown,True,0
16460,2025-06-04 12:47:46,https://ph.jobstreet.com/job/83618425?type=sta...,Citigroup Business Process Solutions Pte. Ltd.,"AVP, Risk Model Sr. Analyst (Hybrid)","Santa Ana, Metro Manila",Analysis & Reporting,Banking & Financial Services,,Hybrid,30d+ ago,83618425,,,unknown,True,0
16449,2025-06-04 12:47:46,https://ph.jobstreet.com/job/83802999?type=sta...,Accenture,DE027776 - Analytics and Modeling Analyst,"Santa Ana, Metro Manila",Developers/Programmers,Information & Communication Technology,,Hybrid,30d+ ago,83802999,,,unknown,True,0
16450,2025-06-04 12:47:46,https://ph.jobstreet.com/job/83947053?type=sta...,Thakral One Inc.,Data Scientist - Telecommunications,"Manila City, Metro Manila","Mathematics, Statistics & Information Sciences",Science & Technology,,Other,30d+ ago,83947053,,,unknown,True,0
16451,2025-06-04 12:47:46,https://ph.jobstreet.com/job/83813313?type=sta...,Thakral One Inc.,Senior Data Scientist - Financial Services,"Manila City, Metro Manila","Mathematics, Statistics & Information Sciences",Science & Technology,,Other,30d+ ago,83813313,,,unknown,True,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,2025-04-05 22:18:00,https://ph.jobstreet.com/job/78989457?type=sta...,Metropolitan Bank & Trust Company,Data Quality Engineer,"Santa Ana, Metro Manila",Engineering - Software,Information & Communication Technology,,Other,30d+ ago,78989457,,,unknown,True,0
416,2025-04-05 22:18:00,https://ph.jobstreet.com/job/79572930?type=sta...,"KSearch Asia Consulting, Inc.",Principal Data Scientist,"Manila City, Metro Manila","Mathematics, Statistics & Information Sciences",Science & Technology,,Other,30d+ ago,79572930,,,unknown,True,0
417,2025-04-05 22:18:00,https://ph.jobstreet.com/job/78818835?type=sta...,Elevated Hiring,AP-Data Analyst,"Manila City, Metro Manila","Mathematics, Statistics & Information Sciences",Science & Technology,,Other,30d+ ago,78818835,,,unknown,True,0
418,2025-04-05 22:18:00,https://ph.jobstreet.com/job/78759264?type=sta...,Panoptik Global,Data Engineer - Data Warehouse,"Quezon City, Metro Manila",Developers/Programmers,Information & Communication Technology,,Hybrid,30d+ ago,78759264,,,unknown,True,0


In [36]:
df_unique = pd.DataFrame()
df_transformed = df.copy()
df_transformed.rename(columns = {'Job ID': "job_id"}, inplace =True)

In [37]:
def sort_unique_job_id(df_transformed, df_unique):
    
    jobs_by_date = df_transformed.sort_values(by = 'Date Obtained', ascending = True)
    unique_jobs = jobs_by_date.drop_duplicates(subset = 'job_id', keep = 'first')

    if not df_unique.empty:
    
        logged_unique_ids = set(df_unique['job_id'])
        unique_jobs = unique_jobs[~unique_jobs['job_id'].isin(logged_unique_ids)]

    df_unique = pd.concat([df_unique, unique_jobs], ignore_index = True)
    df_unique = df_unique.drop(columns = ['Company','Position','Location','Classification','Industry','Salary','Work Arrangement',
                                          'salary_min','salary_max','salary_type','salary_unhandled','salary_pattern'])

    return df_unique

In [38]:
df_transformed.dtypes

Date Obtained       datetime64[ns]
Link                        object
Company                     object
Position                    object
Location                    object
Classification              object
Industry                    object
Salary                      object
Work Arrangement            object
Day Posted                  object
job_id                      object
salary_min                 float64
salary_max                 float64
salary_type                 object
salary_unhandled              bool
salary_pattern               int64
dtype: object

In [39]:
df_unique = sort_unique_job_id(df_transformed, df_unique)

df_unique['Date Obtained'] = pd.to_datetime(df_unique['Date Obtained'])

In [40]:
def get_job_postdate (day_posted, time_ref):
    time_regex = r'(\d+)(m|d|h)(\+)?'

    relative_date = day_posted.lower().strip()

    match = re.search(time_regex, relative_date)
    if not match or match.group(3):
        return None
    
    if match.group(2) == 'm':
        return time_ref - timedelta(minutes = int(match.group(1)))
    elif match.group(2) == 'h':
        return time_ref - timedelta(hours = int(match.group(1)))
    elif match.group(2) == 'd':
        return time_ref - timedelta(days = int(match.group(1)))
    else:
        return None

In [41]:
df_unique['post_date'] = pd.to_datetime(df_unique.apply(
    lambda x:get_job_postdate (x['Day Posted'], time_ref = x['Date Obtained']),
    axis = 1
))

In [46]:
df_unique[df_unique['Day Posted'] == '30d+ ago']['job_id'].unique()

array(['78989457', '80037393', '79004435', '79151153', '79341544',
       '79526144', '79687091', '79736502', '79815064', '79985682',
       '80218076', '79572930', '80194431', '80256246', '80254607',
       '80401872', '80665449', '82889675', '80946367', '81115059',
       '81366983', '81470641', '81517096', '81938855', '80245355',
       '82493696', '78818835', '78691483', '75699671', '76333561',
       '76594626', '76633426', '76328782', '77220481', '76625639',
       '76627039', '76758796', '76744105', '76759028', '78759264',
       '76935616', '76987283', '79362904', '76987241', '76994034',
       '76997351', '77058756', '77188091', '77478438', '77541455',
       '79665148', '77827947', '77634075', '81861895', '82052309',
       '83180789', '83189978', '83200178', '83200346', '83199380',
       '83208112', '83226061', '83226324', '83240193', '83851760',
       '83301229', '83181036', '83300550', '83301526', '82028870',
       '83151728', '83143862', '83134738', '83851844', '831346

In [42]:
df_unique.head()

Unnamed: 0,Date Obtained,Link,Day Posted,job_id,post_date
0,2025-04-05 22:18:00,https://ph.jobstreet.com/job/83980792?type=sta...,13h ago,83980792,2025-04-05 09:18:00
1,2025-04-05 22:18:00,https://ph.jobstreet.com/job/83403318?type=sta...,25d ago,83403318,2025-03-11 22:18:00
2,2025-04-05 22:18:00,https://ph.jobstreet.com/job/83418034?type=sta...,25d ago,83418034,2025-03-11 22:18:00
3,2025-04-05 22:18:00,https://ph.jobstreet.com/job/83420593?type=sta...,24d ago,83420593,2025-03-12 22:18:00
4,2025-04-05 22:18:00,https://ph.jobstreet.com/job/83420600?type=sta...,24d ago,83420600,2025-03-12 22:18:00


In [43]:
df_unique.to_csv('unique_jobs.csv', index = False)
df_transformed.to_csv('transformed_jobstreet_jobs.csv', index = False, encoding='utf-8-sig')