# Importing libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import matplotlib.pyplot as plt
import seaborn as sns


# Load data

In [None]:
df = pd.read_csv('/kaggle/input/supermarket-sales/supermarket_sales - Sheet1.csv')
df.info()

In [None]:
df.head(3)

# **Questions & Graphs**

### 1. Sales volume by different payments methods 


In [None]:
payments = df.groupby(['Payment'])[['Total','Quantity']].apply(sum).reset_index().sort_values(by=['Total'], ascending = False)
payments.head()

### Graph # 1

In [None]:
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
sns.color_palette("hls", 8)
sns.barplot(x='Payment',y='Total', data=payments, palette='Pastel1')

#### As you can see, there is no preferred payment method, but customers choose cash payments slightly more.

### 2. Best-selling product lines

In [None]:
products = df.groupby(['Product line'])[['Total','Quantity']].apply(sum).reset_index().sort_values(by=['Total'], ascending = False)
products.head()

### Graph # 2

In [None]:
ax = sns.barplot(x='Product line',y='Total', data=products, palette='Pastel1')

ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
plt.tight_layout()
plt.show()

#### Again, there is no product line that makes a real difference.

### 3. In relation to questions 1 and 2, according to the product line, does any payment method stand out?

In [None]:
pivot_payments = pd.pivot_table(df , values = 'Total' , index = 'Product line' , columns = 'Payment' , aggfunc = 'count')
pivot_payments

### Graph # 3

In [None]:
sns.countplot(data = df , x = 'Payment' , hue = 'Product line'  , palette='Pastel1')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

#### Here is some important data to analyze:

#### a - Cash payments on electronic accessories have the highest number of transactions. This could be a good opportunity to offer discounts for cash payment.
#### b - Fashion accessories stand out in all payment methods (mainly ewallet payments), which also indicates that it is a niche to be exploited.

### 4. Type of customer according to gender and city

In [None]:
pivot_customer = pd.pivot_table(df , values = 'Total' , index = 'Customer type' , columns = ['Gender','City'] , aggfunc = 'count')
pivot_customer

### Graph # 4 Type of Customer by Gender

In [None]:
sns.countplot(data = df , x = 'Customer type' , hue = 'Gender'  , palette='Pastel1')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

#### In this case, the female gender is mostly committed to the brand.

### Graph # 4.1 Type of Customer by City

In [None]:
sns.countplot(data = df , x = 'Customer type' , hue = 'City'  , palette='Pastel1')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

#### In relation to the second graph, in the city of Yangon, most of the customers are normal, which allows us to think about advertising campaigns to achieve greater loyalty.

### 5. Which is the best time to offer online discounts

In [None]:
# The "Time" column is a object type, so we have to convert to datetime type and then use that information 
# to extract the hours of those transactions and create a new column 'Hours'.

df['Hour'] = pd.to_datetime(df['Time']).dt.hour

In [None]:
# Filter by eWallet Payment only

ewallet = df['Payment']=='Ewallet'

ewallet_sales = df[ewallet]

ewallet_sales.head(3)

In [None]:
hours = df.groupby(['Hour'])[['Quantity']].apply(sum).reset_index().sort_values(by=['Quantity'], ascending = False)
hours.head(10)

In [None]:
sns.barplot(x='Hour',y='Quantity', data=hours, palette='Pastel1')

#### The best hours to promote online offers is 13pm or 19pm 