## **Importing required libraries**

In [None]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

sns.set(style='darkgrid')


for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


## **Reading csv file**

In [None]:
courier_data = pd.read_csv("/kaggle/input/pakistans-courier-companies-dataset/courier_dataset.csv",)

In [None]:
# display top 10 records
courier_data.head(10)

## **Dropping `Unnamed: 7`**

In [None]:
courier_data.drop("Unnamed: 7", axis=1, inplace=True)

## **Analyze meta information about dataframe**

In [None]:
print(f"Shape of dataframe is: {courier_data.shape}")

### **Statistical information of dataframe**

In [None]:
courier_data.describe()

In [None]:
courier_data.dtypes

### **Renaming column**

In [None]:
courier_data.rename({"Delivery Time" : "Delivery Time (in days)"},
                    axis = 1,
                    inplace=True)

### **Declearing a function that convert type of `Delivery Time` to `int`**

In [None]:
def change_type(word):
    return int(word.split(sep = " ")[0])

In [None]:
courier_data['Delivery Time (in days)'] = courier_data['Delivery Time (in days)'].apply(change_type)

In [None]:
courier_data.head(10)

## **Missing values**

In [None]:
plt.figure(figsize=(8,5))

plt.title("Missing values in each column\n", size=15)
sns.heatmap(courier_data.isnull(), cbar=False, yticklabels=False);

**So only Province column has `NaN` values**

In [None]:
# Total NaN values in Province 

courier_data['Province'].isnull().sum()

## **Filling `NaN` values**

In [None]:
colors_list = ['b', 'r', 'g', 'y', 'k']

plt.figure(figsize=(10,6))
plt.title("Total counts of Each Province\n", size = 15)
plt.xlabel("\nProvince")
plt.ylabel("Counts")

plot = courier_data['Province'].value_counts().plot(kind = "bar",
                                                    rot = 1,
                                                    color = colors_list)

for p in plot.patches:
    plot.annotate(format(p.get_height(), '1.0f'), 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = 'center', va = 'center', 
                   xytext = (0, 9) ,
                   textcoords = 'offset points')

In [None]:
courier_data['Province'].value_counts()

In [None]:
courier_data['Province'].mode()[0]

**So filling NaN value with mode of Province i.e Punjab**

In [None]:
courier_data['Province'].fillna("Punjab", inplace = True)

In [None]:
plt.figure(figsize=(8,5))

plt.title("After filling Missing values in Province\n", size = 15)
sns.heatmap(courier_data.isnull(), cbar = False, yticklabels = False);

## **Filtering dataframe by `Company` and `Status`**

In [None]:
courier_data.groupby(['Company','Status'], as_index=False).agg({'Delivery Time (in days)':'mean','Delivery Charges':'mean'})

### **Filtering `Company` by `Delivery Time` and `Delivery Charges`**

In [None]:
courier_data.groupby("Company")['Delivery Time (in days)','Delivery Charges'].agg(['count','min', 'max','mean'])

In [None]:
courier_data.groupby("Company")['Delivery Time (in days)','Delivery Charges'].agg(['count','mean']).plot(kind = 'bar', rot = 45, figsize=(16,6))

plt.legend(loc = 2)
plt.show()

## **Now its your turn. Use <a href="https://www.kaggle.com/muhammadshahrayar/pakistans-courier-companies-dataset">dataset</a> and extract different insights**