## Importing Libraries

In [None]:
import pandas as pd
import itertools
import seaborn as sns
import matplotlib.pyplot as plt

## Set Parameters

In [None]:
plt.rcParams['figure.figsize']=(12,5)

font = {'family': 'serif',
        'color':  'darkred',
        'weight': 'normal',
        'size': 16,
        }

## Load Data

In [None]:
df = pd.read_csv("../input/gufhtugu-publications-dataset-challenge/GP Orders - 4.csv")
df.dropna(inplace=True)
df.head()

## Preprocessing Book Names

In [None]:
book_counts = {}
for book in df['Book Name']:
    if '/' in book:
        m_bks = book.split('/')
        for bk in m_bks:
            if bk in book_counts:
                book_counts[bk] += 1
            else:
                book_counts[bk] = 1
    else:
        if book in book_counts:
            book_counts[book] += 1
        else:
            book_counts[book] = 1

## Top Selling Books

In [None]:
top = dict(sorted(book_counts.items(), key=lambda x: x[1], reverse=True))
top = dict(itertools.islice(top.items(), 5))
plt.barh(list(top.keys())[::-1], list(top.values())[::-1])
plt.title('Top 5 Selling Books', fontdict=font)
plt.margins(0.01)
plt.show()

## Least Selling Books

In [None]:
least = dict(sorted(book_counts.items(), key=lambda x: x[1]))
least = dict(itertools.islice(least.items(), 5))
plt.barh(list(least.keys())[::-1], list(least.values())[::-1])
plt.title('Least 5 Selling Books', fontdict=font)
plt.margins(0.01)
plt.show()

## Visualizing Order Status

In [None]:
os = dict(df['Order Status'].value_counts())
plt.barh(list(os.keys())[::-1], list(os.values())[::-1])
plt.title('Books Order Status', fontdict=font)
plt.margins(0.01)
plt.show()

## Exploring Returned Orders

In [None]:
df_new = df[df['Order Status']=='Returned']
df_new.head()

In [None]:
returned_counts = {}
c = 0
for book in df_new['Book Name']:
    if '/' in book:
        c += 1
        m_bks = book.split('/')
        for bk in m_bks:
            if bk in returned_counts:
                returned_counts[bk] += 1
            else:
                returned_counts[bk] = 1
    else:
        if book in returned_counts:
            returned_counts[book] += 1
        else:
            returned_counts[book] = 1
print(c/len(df_new))

In [None]:
top = sorted(returned_counts.items(), key=lambda x: x[1], reverse=True)
top[0]

In [None]:
df_new['City (Billing)'].value_counts()

### Findings:
<b>*</b> <b><i>21%</i></b> returned orders contained more than 1 book.<br>
<b>*</b> Most returning book is <b><i>Internet Se Paise Kamayen</i></b>. Exactly <b><i>481</i></b> times.<br>
<b>*</b> Most returned orders are from <b><i>Karachi</i></b>.<br>

## Order Frequency by Day

In [None]:
df['Order Date']= pd.to_datetime(df['Order Date'])
df['Day'] = df['Order Date'].apply(lambda x : x.day_name())
df['Day'].value_counts()[::-1].plot(kind='barh')
plt.title('Order Frequency by Day', fontdict=font)
plt.margins(0.01)
plt.show()