In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline

In [None]:
df = pd.read_csv('../input/productivity-prediction-of-garment-employees/garments_worker_productivity.csv')
df.shape

In [None]:
df.isna().sum()

In [None]:
df.head()

In [None]:
df['date'].dtype == 'object'

# **Changing date column type to datetime64**

In [None]:
df['date'] = pd.to_datetime(df['date'])

In [None]:
df['date'].dtype

In [None]:
#Setting date column as index
df.set_index('date', inplace=True)

In [None]:
df.head()

# **Filling WIP NaN values using interpolate(time) method**

In [None]:
df['wip'].interpolate(method='time',inplace=True)

In [None]:
df[['wip']].head()

In [None]:
df['wip'].isna().sum()

In [None]:
df['idle_time'].plot(kind='hist')
plt.show()

In [None]:
df['idle_men'].plot(kind='hist')
plt.show()

In [None]:
df['no_of_style_change'].plot(kind='hist')
plt.show()

# **Dropping 'idle_men', 'idle_time' and 'no_of_style_change' columns because high frequency in 0 values**

In [None]:
df.drop(['idle_men', 'idle_time','no_of_style_change'], axis=1,inplace=True)
df.head()

In [None]:
df['department'].unique()

In [None]:
df['department'] = df['department'].str.replace(' ','')
df['department'].unique()

In [None]:
df['department'].value_counts().plot(kind='pie', autopct="%.2f")
plt.show()

In [None]:
plt.title("Quarters")
df['quarter'].value_counts().plot(kind='pie')
plt.show()

In [None]:
quarters = df['quarter'].unique()

# **Department in each Quarter**

In [None]:
for quarter in quarters:
    qcounter = df['department'][df['quarter'] == quarter].value_counts()
    plt.pie(qcounter, autopct="%.2f", labels=qcounter.keys())
    plt.title(f"Department in {quarter}")
    plt.xlabel(f"Total:{sum(qcounter)}")
    plt.show()
    print("\n")

# **Total working days**

In [None]:
df['day'].value_counts().plot(kind='barh')
plt.title("Total working days")
plt.xlabel('Frequency')
plt.show()

# **Work In Progress(WIP) on week days**

In [None]:
days = df['day'].unique()
for day in days:
    plt.style.use('seaborn')
    plt.title(f"Work In Progress on {day}s")
    wip_day = df['wip'][df['day'] == day]
    wip_day.plot(kind="hist", rwidth=0.95, color='orange')
    plt.show()
    

# **Targeted productivity vs Actual productivity**

In [None]:
t_vs_a = ['targeted_productivity', 'actual_productivity']
df[t_vs_a].describe()

In [None]:
sns.histplot(data=df[['targeted_productivity', 'actual_productivity']], element='poly')
plt.show()

# **Analysis on incentives**

In [None]:
df['incentive'].plot(kind='hist')
plt.show()

# **Incentives > 0**

In [None]:
df['incentive'][df['incentive']>0].plot(kind='line')
plt.yscale('log')
plt.show()

# **Incentives per Quarter**

In [None]:
for quarter in quarters:
    plt.title(f"Incentives in {quarter}")
    df['incentive'][(df['quarter'] == quarter) & (df['incentive']>0)].plot(kind='hist', rwidth=0.95)
    plt.show()
    print('\n')

# **Incentives per weekdays**

In [None]:
for day in days:
    plt.title(f"Incentives on {day}s")
    df['incentive'][(df['day'] == day) & (df['incentive']>0)].plot(kind='hist', rwidth=0.95)
    plt.show()
    print('\n')

# **Incentives for sweing vs Incentives for finishing**

In [None]:
depts = df['department'].unique()
for dept in depts:
    plt.title(f"Incentives on {dept}")
    df['incentive'][(df['department'] == dept) & (df['incentive']>1)].plot(kind='hist', rwidth=0.95)
    plt.show()
    print('\n')    

# **As a result Finishing department gets high icentives than Sweing department**

# **Team and Over time**

In [None]:
x_axis = df['team'].unique()

In [None]:
y_axis=[]
for val in x_axis:
    y_axis.append(df['over_time'][df['team'] == val].mean())

In [None]:
sns.lineplot(x=x_axis, y=y_axis)
plt.title("Team and Overtime")
plt.xlabel('Team Size')
plt.ylabel('Average Overtime')
plt.show()