## ***Job Market Insights***  

In [None]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#Loading the file
df = pd.read_csv('/content/job_market.csv')

In [None]:
#Reading the information
df.info()

In [None]:
df.shape

# ***Cleaning the Data***

In [None]:
#Checking the null values in that file
df.isnull().sum()

In [None]:
#Checking the duplicates
df.duplicated().sum()

In [None]:
#Removing null values
df.dropna(inplace=True)

In [None]:
#checking after removing the null rows
df.info()

In [None]:
#Checking after removing the null
df.isnull().sum()

In [None]:
df

In [None]:
df.head(20)

In [None]:
df.tail()

In [None]:
#Checking the columns
df.columns

In [None]:
df.describe()

In [None]:
#most common Job titles
d1 = df.job_title.value_counts().head(10)
plt.figure(figsize=(10,4))
sns.barplot(x=d1.index,y=d1.values,hue=d1.index,palette='viridis')
plt.title('Most common job title',fontweight='bold')
plt.xlabel('Job Title',fontweight='bold')
plt.ylabel('Number of jobs',fontweight='bold')
plt.xticks(rotation=90)
plt.show()

In [None]:
#companies with most postings
d3 = df.company.value_counts().head(10)
d3_df = d3.reset_index()
d3_df['rank'] = range(1, len(d3_df)+1)
plt.figure(figsize=(10,6))
sns.regplot(data=d3_df, x='rank', y=d3.values,scatter_kws={'s':100, 'color':'lightgreen'}, line_kws={'color':'violet', 'lw':2})
plt.title('Top 10 Companies Job Postings Trend', fontweight='bold')
plt.xlabel('Company Rank', fontweight='bold')
plt.ylabel('Number of Postings', fontweight='bold')
plt.xticks(range(1,11), d3.index, rotation=90)
plt.grid(True, alpha=0.3)
plt.show()


In [None]:
#company wise location
d2= df.groupby('company')['location'].agg('count').sort_values(ascending = True)
plt.figure(figsize=(14,8))
plt.stem(d2.index, d2.values, basefmt=" ", linefmt='lightgreen', markerfmt='o', bottom=0)
plt.title('Company Wise Location Count', fontweight='bold')
plt.xlabel('Company', fontweight='bold')
plt.ylabel('Number of Locations', fontweight='bold')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
#company wise job type count
d2 = pd.crosstab(df.company,df.job_type).plot(kind='bar',figsize=(10,6))


In [None]:
#average salary per job title
df['avg_salary'] = (df['salary_min'] + df['salary_max'])/2
d4 = df.groupby('job_title')['avg_salary'].mean().sort_values(ascending=True)
plt.figure(figsize=(15,6))
sns.histplot(x=d4.index,y=d4.values,palette='Set2',bins = 15)
plt.title('Average salary for job title',fontweight='bold')
plt.xlabel('job Title',fontweight='bold')
plt.ylabel('Salary',fontweight='bold')
plt.xticks(rotation=90)
plt.show()

In [None]:
#Experience Required Distribution
plt.figure(figsize=(12,6))
sns.kdeplot(data=df, x='experience_required', color='mediumPurple', fill=True)
plt.title('Distribution of Experience Required')
plt.xlabel('Experience')
plt.ylabel('Density')
plt.show()


Experience wise number of jobs

In [None]:
plt.figure(figsize=(12,6))
sns.scatterplot(x=df['experience_required'],y=df['avg_salary'])
plt.title('Experience Vs salary',fontweight='bold')
plt.xlabel('Experience',fontweight='bold')
plt.ylabel('Salary',fontweight='bold')
plt.show()

In [None]:
exp_sal = df.groupby('job_title')['avg_salary'].sum()
sns.lineplot(x=df['job_title'],y=df['avg_salary'])
plt.title('Experience wise Salary',fontweight='bold')
plt.xlabel('Job title',fontweight='bold')
plt.ylabel('Salary',fontweight='bold')
plt.xticks(rotation=90)
plt.show()

In [None]:
#Location wise average
df.groupby('location')['avg_salary'].mean().sort_values(ascending=True)
plt.figure(figsize=(10,6))
sns.violinplot(x='location',y='avg_salary',data =df)
plt.title('Location wise average salary',fontweight='bold')
plt.xlabel('Location',fontweight='bold')
plt.ylabel('Salary',fontweight='bold')
plt.xticks(rotation=90)
plt.show()

In [None]:
#split skills into individual
skills_series = df['skills'].dropna().str.split(",").explode().str.strip()
top_skills = skills_series.value_counts().head(15)
print(top_skills)
plt.pie(x=top_skills.values, labels=top_skills.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette("crest", len(top_skills)))
plt.title("Top 15 Most Demanded Skills",fontweight='bold')
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()

These are top 3 demanding skills are:
1.Machine Learning
2.Python
3.Go

These are least 3 demanding skills are:
1.MongoDB
2.TensorFlow
3.Java

In [None]:
sns.heatmap(df.corr(numeric_only=True),cmap='inferno',annot=True)

In [None]:
df[df['job_title']=='Engineering Manager'].head(10)

In [None]:
pd.pivot_table(df, index=['company','job_title', 'experience_required'], values=['avg_salary'], aggfunc='sum')

In [None]:
#To save a file
df.to_csv('New_Job_Market.csv')