In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

 **User Stories**

**1. Number of people hired in different employer industry**

As a student/aspiring employee, I would like to know number of people hired in different industries, so that I can choose a profession that has more opportunities in the present industry.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
gov_data = pd.read_csv("../input/govdata/govhack-job-placements-dataset_20160701-to-20170630.csv")
plt.figure(figsize = (20,10))
#plt.bar(employer)
#plt.show()
employer=gov_data["EMPLOYER_INDUSTRY"].value_counts()

width = 0.3
plt.barh(employer.index, employer)
plt.xlabel("Number of people hired")
plt.ylabel("Employer Industry")


**2. Number of people employed with respect to gender**

As a student/aspiring employee, I would like to know the stats about number of people are being employed in different gender. Which helps to educate and encourage the next term students that are opportunities are available for all the genders.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

my_data = pd.read_csv("../input/genderprof/genderjobtype1.csv", header=0)

my_data.set_index('Job_type', inplace=True)
my_data = my_data[0:-2] 

ax = my_data.plot(
    kind='bar', 
    figsize=(15, 8) ,
    ylim=(0,98000),
    width=0.8
)

ax.set_title("Male and Female working in different industries", fontsize=16)
#ax.set_ylabel("In %", fontsize=14)
ax.set_xlabel("Job_Industry", fontsize=12)

**3. Working professions with respect to their age**

As a student/aspiring employee, I would like to know the existing demand for jobs in my age group. That helps me to get prepared accordingly to start my career as early as possible.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

my_data = pd.read_csv("../input/jobtypevsage/jobtypevsagegroup.csv", header=0)

my_data.set_index('Job_Industry', inplace=True)
my_data = my_data[1:7] 

ax = my_data.plot(
    kind='bar', 
    figsize=(15, 10) ,
    ylim=(0,13000),
    width=0.8
)

ax.set_title("Different Age groups working in different industries", fontsize=16)
#ax.set_ylabel("In %", fontsize=14)
ax.set_xlabel("Job_Industry", fontsize=12)

**4. People employed over last 10 years (2011-till date)**

As a student/aspiring employee, I would like to know the employment rate over last 10 years in Australia and analyse the effect of COVID on jobs, which helps me to understand the job in today’s market.

In [None]:
dataset = pd.read_csv('../input/employementdifferencetrend/Employment_Trend.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
dataset['Date']=pd.to_datetime(dataset['Date'], format='%b-%y')
dataset['Employed people']=dataset.iloc[:,1].str.replace(',', '').astype(float)
Growth_Decline = pd.DataFrame(columns = ['Change_Rate'])
for index in range(1,(len(dataset['Employed people']))):
  Growth_Decline = Growth_Decline.append({'Change_Rate' : ((dataset['Employed people'][index])-(dataset['Employed people'][index-1]))}, 
                ignore_index = True)
#Dropping the last row to match the size of the dataframes
dataset.drop(dataset.tail(1).index,inplace = True)

# Graph plotting
plt.plot(dataset['Date'],dataset['Employed people'])
plt.title('Year vs Employment in the country')
plt.xlabel('Year')
plt.ylabel('People employed each year')
plt.show()

**5. Employer industry and their opportunities.**

As a student/aspiring employee, I would like to know the industry with more opportunities, so that I can choose proper career path.

In [None]:
employer.plot(kind='pie',
figsize=(15, 20),
autopct='%2.2f%%')
plt.pie(employer)
plt.axis('equal')

**6. Difference in employment (employed – unemployed) over past 10 years**

As a student/aspiring employee, I would like to know the trend in difference in employment over the past decade, so that I can predict the job opportunities by the time I finish my graduation.

In [None]:
plt.plot(dataset['Date'],Growth_Decline['Change_Rate'])
plt.title('Year vs Growth/Decline of employment in the country')
plt.xlabel('Year')
plt.ylabel('Difference in people employed each year')
plt.show()
plt.figure(figsize = (20,10))

**7.Annual population change (%) over last 10 years.**

As a student/aspiring employee, I would like to know the difference in population over the years to understand the relation between population and employment during the same time period. 

In [None]:
import statistics
popdataset = pd.read_csv('../input/populationd/Annual population growth rate(a)(b).csv')
popdataset=popdataset.groupby('Date')[['Annual population change (%)']].mean()
popdataset=popdataset.reset_index()
popdataset=popdataset.sort_values('Annual population change (%)',ascending=True)
#popdataset=popdataset[:5]
popdataset
plt.figure(figsize=(10,7))
plt.bar(popdataset['Date'], popdataset['Annual population change (%)'])
plt.xlabel("Year")
plt.ylabel("Change in population (%)")
plt.title("Annual population change over years")
plt.show()
#print("std" %(statistics.stdev(CEC_)))

**8. Number of jobs posted in seek and gumtree for different types of work contract.**

As a final year student, I would like to know the number of jobs posted in different work contracts in job boards like seek and gumtree

In [None]:
seek_data = pd.read_csv("../input/seekdata/seek_australia.csv")
job_type = seek_data["job_type"].value_counts()
width = 0.3
plt.bar(job_type.index, job_type, color = 'm')
plt.xlabel("Type of Work Contract")
plt.ylabel("Number of Jobs posted in Seek")
#plt.plot(job_type)

In [None]:
Gumtree_data = pd.read_csv("../input/gumtreejobsdata/Gumtree_australia.csv")
Gumtree_data=Gumtree_data[Gumtree_data['job_type'].isin(['Part-time', 'Full-time', 'Casual', 'Contract'])]
job_type = Gumtree_data["job_type"].value_counts()
width = 0.3

#Gumtree_data['job_type'].plot(kind='bar', color='red')
plt.bar(job_type.index, job_type,color = 'y')
plt.xlabel("Type of Work Contract")
plt.ylabel("Number of Jobs Posted through Gumtree")

#plt.plot(job_type)


**9. Number of jobs posted in seek and gumtree in major cities/states**

As a Post-graduate student, I would like to analyse which city/state has more number of jobs posted in job boards, so that I can relocate accordingly.

In [None]:
#city = seek_data["city"].value_counts()
#width = 0.3
seek_data=seek_data[seek_data['city'].isin(['Sydney', 'Melbourne', 'Adelide', 'Brisbane', 'Perth'])]
city = seek_data["city"].value_counts()
width = 0.3
plt.xlabel("Major Cities in Australia")
plt.ylabel("Number of Jobs Posted through seek")
plt.bar(city.index, city, color = 'k')


In [None]:
Gumtree_data=Gumtree_data[Gumtree_data['state'].isin(['VIC', 'SA', 'QLD', 'NSW'])]
state = Gumtree_data["state"].value_counts()
width = 0.3

plt.bar(state.index, state, color = 'c')
plt.xlabel("States in Australia")
plt.ylabel("Number of Jobs posted through Gumtree")

 **10. Average hourly pay rate in different trades.**

As an aspiring employee, I would like to know the average pay rates in different trades to choose the appropriate job.  

In [None]:
Hourlypay_data=pd.read_csv("../input/hourly-pay/2004-2017-averagehourlyearningsofmaleandfemaleemployeesbyoccupation-indicator-8-5-1 (2).csv")
plt.barh(Hourlypay_data['Trade'],Hourlypay_data['Average hourly pay'])
plt.show()

**11. Difficulties faced in employment**

As a student/aspiring employee, I want to know the types of difficulties faced in the employment market to prepare myself in a better way. 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

my_data = pd.read_csv("../input/difficultiesdata/Graph 3 - Main difficulty in finding work (1) (1).csv", header=1)

my_data.set_index('Difficulties', inplace=True)
my_data = my_data[0:-2] 

ax = my_data.plot(
    kind='bar', 
    figsize=(13, 6) ,
    ylim=(0,25),
    width=0.8
)

ax.set_title("Difficulties %", fontsize=16)
ax.set_ylabel("In %", fontsize=14)
ax.set_xlabel("Difficulties Reasons", fontsize=12)

The following is the link for the decision tree.

https://docs.google.com/forms/d/e/1FAIpQLSfzxuhaUG5yZ6jDUgJVP0O6u0gn_nFqpzezXe80b22h5LUD2g/viewform?usp=sf_link