In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import os

os.makedirs("visualizations", exist_ok=True)


In [4]:
import pandas as pd

df = pd.read_excel("remoteok_jobs.xlsx")


In [5]:
df.to_csv("remoteok_jobs_cleaned.csv", index=False)


In [6]:
df = pd.read_csv("remoteok_jobs_cleaned.csv")


In [11]:

df_skills = df.copy()
df_skills['Skills / Tags'] = df_skills['Skills / Tags'].dropna().str.split(', ')
df_skills = df_skills.explode('Skills / Tags')

# Visualization 1: Top 10 Skills Demand

top_skills = df_skills['Skills / Tags'].value_counts().head(10)

plt.figure(figsize=(12, 6))
top_skills.plot(kind='bar', color='steelblue')
plt.title('Top 10 Most Demanded Skills in Remote Jobs', fontweight='bold')
plt.xlabel('Skill')
plt.ylabel('Number of Job Postings')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig("visualizations/top_skills.png", dpi=300)
plt.close()

# Visualization 2: Job Type Distribution

job_type_counts = df['Job Type'].value_counts()

plt.figure(figsize=(10, 8))
plt.pie(job_type_counts, labels=job_type_counts.index,
        autopct='%1.1f%%', startangle=90)
plt.title('Distribution of Job Types in Remote Jobs', fontweight='bold')
plt.tight_layout()
plt.savefig("visualizations/job_type_distribution.png", dpi=300)
plt.close()

# Visualization 3: Top 10 Job Titles

top_titles = df['Job Title'].value_counts().head(10)

plt.figure(figsize=(12, 8))
top_titles.plot(kind='barh', color='coral')
plt.title('Top 10 Most Common Remote Job Titles', fontweight='bold')
plt.xlabel('Number of Postings')
plt.ylabel('Job Title')
plt.tight_layout()
plt.savefig("visualizations/top_job_titles.png", dpi=300)
plt.close()

# Visualization 4: Skill Frequency Comparison

top_skills_extended = df_skills['Skills / Tags'].value_counts().head(15)

plt.figure(figsize=(12, 10))
top_skills_extended.plot(kind='barh', color='lightgreen')
plt.title('Top 15 Skills Frequency in Remote Job Postings', fontweight='bold')
plt.xlabel('Frequency')
plt.ylabel('Skill')
plt.tight_layout()
plt.savefig("visualizations/skill_frequency_comparison.png", dpi=300)
plt.close()

# Comparative Analysis 1: Contract vs Full-Time

full_time = df[df['Job Type'] == 'Full-Time']
contract = df[df['Job Type'] == 'Contract']

def extract_skills(sub_df):
    temp = sub_df.copy()
    temp['Skills / Tags'] = temp['Skills / Tags'].dropna().str.split(', ')
    return temp.explode('Skills / Tags')

ft_skills = extract_skills(full_time)
ct_skills = extract_skills(contract)

print("\nTop Skills for Full-Time Jobs:")
print(ft_skills['Skills / Tags'].value_counts().head(10))

print("\nTop Skills for Contract Jobs:")
print(ct_skills['Skills / Tags'].value_counts().head(10))

# Comparative Analysis 2: Skill Demand Across Job Titles

top_titles = df['Job Title'].value_counts().head(3).index.tolist()

for title in top_titles:
    subset = df[df['Job Title'] == title]
    temp = subset.copy()
    temp['Skills / Tags'] = temp['Skills / Tags'].dropna().str.split(', ')
    temp = temp.explode('Skills / Tags')

    print(f"\nTop skills for '{title}':")
    print(temp['Skills / Tags'].value_counts().head(5))


# Comparative Analysis 3: Remote Distribution

location_counts = df['Location'].value_counts()
location_percentages = (location_counts / len(df) * 100).round(2)

print("\nTop Locations for Remote Jobs:")
for loc, count in location_counts.head(10).items():
    print(f"{loc}: {count} jobs ({location_percentages[loc]}%)")

print(f"\nTop 5 locations account for {location_percentages.head(5).sum()}% of jobs")



Top Skills for Full-Time Jobs:
Series([], Name: count, dtype: int64)

Top Skills for Contract Jobs:
Series([], Name: count, dtype: int64)

Top skills for 'Senior Data Engineer':
Skills / Tags
senior         2
engineer       2
engineering    2
crypto         1
operational    1
Name: count, dtype: int64

Top skills for 'Espresso':
Series([], Name: count, dtype: int64)

Top skills for 'Staff Software Engineer':
Skills / Tags
software     1
design       1
technical    1
support      1
code         1
Name: count, dtype: int64

Top Locations for Remote Jobs:
Remote: 32 jobs (32.0%)
United States: 12 jobs (12.0%)
Palo Alto: 2 jobs (2.0%)
New York City: 2 jobs (2.0%)
Remote - US: 2 jobs (2.0%)
San Francisco: 2 jobs (2.0%)
Texas: 2 jobs (2.0%)
London: 2 jobs (2.0%)
Remote - North America: 2 jobs (2.0%)
Remote - United States: 2 jobs (2.0%)

Top 5 locations account for 50.0% of jobs
