In [121]:
# Dependencies and Setup
%matplotlib notebook
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
from datetime import datetime


In [122]:
# set path 
file_path_underemployment = ('./underemployed_cleaned_df.csv')

# Read file

df = pd.read_csv(file_path_underemployment)

# Creating Year column and setting to index
df['Year'] = pd.to_datetime(df['datetime_col']).dt.year

# set index
# df = df.set_index('Year')


# display csv
df

Unnamed: 0,datetime_col,Total,NSW,VIC,QLD,SA,WA,TAS,NT,ACT,Year
0,2015-01-01 00:00:00,8.522065,7.764878,9.543552,9.094776,8.846687,7.919322,10.512648,4.051508,6.206704,2015
1,2015-02-01 00:00:00,8.534239,8.006742,9.044415,9.019849,9.192436,7.429148,10.690078,4.279800,6.878685,2015
2,2015-03-01 00:00:00,8.156700,7.854217,8.535019,8.686408,8.370271,7.013054,10.236457,5.313185,6.929772,2015
3,2015-04-01 00:00:00,8.321234,8.272812,8.090192,8.918204,8.666951,7.208587,10.534614,4.051614,6.379935,2015
4,2015-05-01 00:00:00,8.380915,8.188289,8.766596,8.508942,9.756769,7.521865,10.898142,4.263507,5.807633,2015
...,...,...,...,...,...,...,...,...,...,...,...
67,2020-08-01 00:00:00,11.274028,10.313800,14.510257,9.967351,11.735906,9.933345,12.291099,7.656487,5.638565,2020
68,2020-09-01 00:00:00,11.402061,10.193139,14.941599,10.680071,11.027452,9.444054,10.741279,5.962201,6.136426,2020
69,2020-10-01 00:00:00,10.363131,9.737277,12.919049,9.478337,10.089841,8.634007,10.476856,7.768383,7.371226,2020
70,2020-11-01 00:00:00,9.291054,8.831102,10.206324,9.351090,10.631538,7.939880,10.118709,5.938504,5.911004,2020


In [123]:
df.columns

Index(['datetime_col', 'Total', 'NSW', 'VIC', 'QLD', 'SA', 'WA', 'TAS', 'NT',
       'ACT', 'Year'],
      dtype='object')

In [124]:
year_average_df = df.groupby('Year').mean()
year_average_df = year_average_df.reset_index()
year_average_df

Unnamed: 0,Year,Total,NSW,VIC,QLD,SA,WA,TAS,NT,ACT
0,2015,8.491477,8.03426,8.952324,8.666097,9.737795,8.005288,10.340877,4.659565,6.67066
1,2016,8.599235,7.991173,9.163412,8.350379,10.132277,9.19152,9.384522,4.998575,5.870782
2,2017,8.622179,8.023125,8.68421,8.6902,9.751985,9.87649,10.268192,4.931026,6.197332
3,2018,8.448469,7.996342,8.119939,8.980506,9.610686,9.399411,9.968685,4.267286,6.235299
4,2019,8.333806,7.781463,8.351959,8.609011,9.091572,9.233481,10.497271,5.684137,5.833325
5,2020,10.554516,10.126741,11.827599,10.095956,11.333123,9.869126,11.986933,7.40554,6.323311


In [125]:
# Generate a line plot of underemployment vs. time point for each state and overall

# locate data for each plot
Total = year_average_df['Total']
NSW = year_average_df['NSW']
VIC = year_average_df['VIC']
QLD = year_average_df['QLD']
SA = year_average_df['SA']
WA = year_average_df['WA']
TAS =year_average_df['TAS']
NT =year_average_df['NT']
ACT = year_average_df['ACT']


# # set x_axis for chart
x_axis = year_average_df['Year']

# plot
plt.plot(x_axis,Total, Label='Total Population')
plt.plot(x_axis,NSW, Label='NSW')
plt.plot(x_axis,VIC, Label='VIC')
plt.plot(x_axis,QLD, Label='QLD')
plt.plot(x_axis,SA, Label='SA')
plt.plot(x_axis,WA, Label='WA')
plt.plot(x_axis,TAS, Label='TAS')
plt.plot(x_axis,NT, Label='NT')
plt.plot(x_axis,ACT, Label='ACT')


# Increase size of plot in jupyter

# plt.rcParams["figure.figsize"] = (10,5)

# # years = [2015, 2016, 2017, 2018, 2019, 2020]
# # plt.xticks(rotation=20)

plt.title('Underemployment based on proportion of labour force')
plt.xlabel("Years")
plt.ylabel("Underemployment Rate")
plt.legend(loc=3)
plt.grid()

# save
plt.savefig('Figures/Underemployment.png', bbox_inches='tight')

# show
plt.show()


<IPython.core.display.Javascript object>

In [107]:
# year_average_df.mean()

In [126]:
####Unemployment

file_path_unemplyoment = ('./unemployment_cleaned_df.csv')
# Read file

df_unemployment = pd.read_csv(file_path_underemployment)

# Creating Year column and setting to index
df_unemployment['Year'] = pd.to_datetime(df_unemployment['datetime_col']).dt.year

df_unemployment

Unnamed: 0,datetime_col,Total,NSW,VIC,QLD,SA,WA,TAS,NT,ACT,Year
0,2015-01-01 00:00:00,8.522065,7.764878,9.543552,9.094776,8.846687,7.919322,10.512648,4.051508,6.206704,2015
1,2015-02-01 00:00:00,8.534239,8.006742,9.044415,9.019849,9.192436,7.429148,10.690078,4.279800,6.878685,2015
2,2015-03-01 00:00:00,8.156700,7.854217,8.535019,8.686408,8.370271,7.013054,10.236457,5.313185,6.929772,2015
3,2015-04-01 00:00:00,8.321234,8.272812,8.090192,8.918204,8.666951,7.208587,10.534614,4.051614,6.379935,2015
4,2015-05-01 00:00:00,8.380915,8.188289,8.766596,8.508942,9.756769,7.521865,10.898142,4.263507,5.807633,2015
...,...,...,...,...,...,...,...,...,...,...,...
67,2020-08-01 00:00:00,11.274028,10.313800,14.510257,9.967351,11.735906,9.933345,12.291099,7.656487,5.638565,2020
68,2020-09-01 00:00:00,11.402061,10.193139,14.941599,10.680071,11.027452,9.444054,10.741279,5.962201,6.136426,2020
69,2020-10-01 00:00:00,10.363131,9.737277,12.919049,9.478337,10.089841,8.634007,10.476856,7.768383,7.371226,2020
70,2020-11-01 00:00:00,9.291054,8.831102,10.206324,9.351090,10.631538,7.939880,10.118709,5.938504,5.911004,2020


In [127]:
year_average_unemployment_df = df_unemployment.groupby('Year').mean()
year_average_unemployment_df = year_average_unemployment_df.reset_index()
year_average_unemployment_df

Unnamed: 0,Year,Total,NSW,VIC,QLD,SA,WA,TAS,NT,ACT
0,2015,8.491477,8.03426,8.952324,8.666097,9.737795,8.005288,10.340877,4.659565,6.67066
1,2016,8.599235,7.991173,9.163412,8.350379,10.132277,9.19152,9.384522,4.998575,5.870782
2,2017,8.622179,8.023125,8.68421,8.6902,9.751985,9.87649,10.268192,4.931026,6.197332
3,2018,8.448469,7.996342,8.119939,8.980506,9.610686,9.399411,9.968685,4.267286,6.235299
4,2019,8.333806,7.781463,8.351959,8.609011,9.091572,9.233481,10.497271,5.684137,5.833325
5,2020,10.554516,10.126741,11.827599,10.095956,11.333123,9.869126,11.986933,7.40554,6.323311


In [131]:
# Generate a line plot of unemployment vs. time point for each state 

# locate data for each plot
Total = year_average_unemployment_df['Total']
NSW = year_average_unemployment_df['NSW']
VIC = year_average_unemployment_df['VIC']
QLD = year_average_unemployment_df['QLD']
SA = year_average_unemployment_df['SA']
WA = year_average_unemployment_df['WA']
TAS =year_average_unemployment_df['TAS']
NT =year_average_unemployment_df['NT']
ACT =year_average_unemployment_df['ACT']

In [133]:
plt.figure(2)

# # set x_axis for chart
x_axis = year_average_unemployment_df['Year']

# plot
plt.plot(x_axis,Total, Label='Total Population')
plt.plot(x_axis,NSW, Label='NSW')
plt.plot(x_axis,VIC, Label='VIC')
plt.plot(x_axis,QLD, Label='QLD')
plt.plot(x_axis,SA, Label='SA')
plt.plot(x_axis,WA, Label='WA')
plt.plot(x_axis,TAS, Label='TAS')
plt.plot(x_axis,NT, Label='NT')
plt.plot(x_axis,ACT, Label='ACT')


# Increase size of plot in jupyter

# plt.rcParams["figure.figsize"] = (10,5)

# # years = [2015, 2016, 2017, 2018, 2019, 2020]
# # plt.xticks(rotation=20)

plt.title('Unemployment Flow 2015-2020')
plt.xlabel("Years")
plt.ylabel("Unemployment")
plt.legend(loc=3)
plt.grid()

# save
plt.savefig('Figures/Unemployment.png', bbox_inches='tight')

# show
plt.show()

<IPython.core.display.Javascript object>