# **Diwali Sales Analysis**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### **Data Cleaning**

In [None]:
df = pd.read_csv('Data/Diwali_Sales_Data.csv', encoding = 'unicode_escape') # 'unicode_escape' is used to avoid encoding error

In [None]:
df.shape # shape provides the dimenson of any DataFrame

In [None]:
df.info() # info() displays the information about the DataFrame

In [None]:
df.drop(['Status', 'unnamed1'], axis = 1, inplace = True) # drop() is used to drop the unused rows/columns

In [None]:
pd.isnull(df).sum() # isnull() checks for the null values in the DataFrame

In [None]:
df.dropna(inplace = True) #dropna is used to drop the null values in the DataFrame

In [None]:
df['Amount'] = df['Amount'].astype('int') # astype is used to change the datatype 

In [None]:
df.columns # columns projects the columns present in the DataFrame

In [None]:
df.describe() # describe() returns the description of the data in the DataFrame

### **Exploratory Data Analysis**

#### **Gender**

In [None]:
plt.figure(figsize = (10, 6), dpi = 100)
sample = sns.countplot(x = 'Gender', data = df, palette = 'autumn')

for bars in sample.containers:
    sample.bar_label(bars)
    
plt.xlabel('Sex', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.ylabel('Person Count', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.title("Person Count - Sex Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 18, 'fontweight': 'bold'})

plt.show()

In [None]:
sample = df.groupby(['Gender'], as_index = False)['Amount'].sum().sort_values(by = 'Amount', ascending = False)

plt.figure(figsize = (10, 6), dpi = 100)
sns.barplot(x = 'Gender', y = 'Amount', data = sample, palette = 'autumn_r')

plt.xlabel('Sex', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.ylabel('Amount', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.title("Amount - Sex Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 18, 'fontweight': 'bold'})

plt.show()

**_The analysis above leads us to the conclusion that most consumers are women, and that women have greater purchasing power than men._**

#### **Age**

In [None]:
plt.figure(figsize = (10, 6), dpi = 100)
sample = sns.countplot(x = 'Age Group', hue = 'Gender', data = df, palette = 'spring')

for bars in sample.containers:
    sample.bar_label(bars)
    
plt.xlabel('Age Group', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.ylabel('Person Count', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.title("Person Count - Age Group Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 18, 'fontweight': 'bold'})

plt.show()

In [None]:
sample = df.groupby(['Age Group'], as_index = False)['Amount'].sum().sort_values(by = 'Amount', ascending = False)

plt.figure(figsize = (10, 6), dpi = 100)
sns.barplot(x = 'Age Group', y = 'Amount', data = sample, palette = 'Accent')

plt.xlabel('Age Group', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.ylabel('Amount', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.title("Amount - Age Group Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 18, 'fontweight': 'bold'})

plt.show()

**_The analysis above leads us to the conclusion that women between the ages of 26 and 35 make up the majority of consumers._**

#### **State**

In [None]:
sample = df.groupby(['State'], as_index = False)['Orders'].sum().sort_values(by = 'Orders', ascending = False)

plt.figure(figsize = (27, 7), dpi = 500)
sns.barplot(x = 'State', y = 'Orders', data = sample, palette = 'Set1')

plt.xlabel('States', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.ylabel('Orders', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.title("States - Orders Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 20, 'fontweight': 'bold'})

plt.show()

In [None]:
sample = df.groupby(['State'], as_index = False)['Amount'].sum().sort_values(by = 'Amount', ascending = False)

plt.figure(figsize = (27, 7), dpi = 500)
sns.barplot(x = 'State', y = 'Amount', data = sample, palette = 'Set1')

plt.xlabel('States', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.ylabel('Amount', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.title("States - Amount Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 20, 'fontweight': 'bold'})

plt.show()

**_From the above information we conclude that the majority of orders come from Uttar Pradesh, Maharashtra, and Karnataka, respectively. The same states also account for the majority of order expenditures._**

#### **Marital Status**

In [None]:
plt.figure(figsize = (10, 6), dpi = 100)
sample = sns.countplot(x = "Marital_Status", data = df, palette = 'cool')

for bars in sample.containers:
    sample.bar_label(bars)
    
plt.xlabel('Marital Status', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.ylabel('Person Count', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.title("Person Count - Marital Status Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 18, 'fontweight': 'bold'})

plt.show()

In [None]:
sample = df.groupby(['Marital_Status', 'Gender'], as_index = False)['Amount'].sum().sort_values(by = 'Amount', ascending = False)

plt.figure(figsize = (10, 6), dpi = 100)
sns.barplot(x = 'Marital_Status', y = 'Amount', data = sample, hue = 'Gender', palette = 'cool_r')

plt.xlabel('Marital Status', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.ylabel('Amount', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.title("Amount - Marital Status Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 18, 'fontweight': 'bold'})

plt.show()

**_The analysis above leads us to the conclusion that the majority of consumers are married women with substantial purchasing power._**

#### **Occupation**

In [None]:
plt.figure(figsize = (25, 5), dpi = 500)
sample = sns.countplot(x = 'Occupation', data = df)

for bars in sample.containers:
    sample.bar_label(bars)
    
plt.xlabel('Occupation', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.ylabel('Person Count', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.title("Person Count - Occupation Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 20, 'fontweight': 'bold'})

plt.show()

In [None]:
sample = df.groupby(['Occupation'], as_index = False)['Amount'].sum().sort_values(by = 'Amount', ascending = False)

plt.figure(figsize = (25, 5), dpi = 500)
sns.barplot(x = 'Occupation', y = 'Amount', data = sample)

plt.xlabel('Occupation', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.ylabel('Amount', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.title("Amount - Occupation Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 20, 'fontweight': 'bold'})

plt.show()

**_The analysis above leads us to the conclusion that the majority of consumers are employed in the IT, Aviation, and Healthcare sectors._**

#### **Product Category**

In [None]:
plt.figure(figsize = (30, 10), dpi = 700)
sample = sns.countplot(x = 'Product_Category', data = df)

for bars in sample.containers:
    sample.bar_label(bars)
    
plt.xlabel('Product Category', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 17, 'fontweight': 'bold'})
plt.ylabel('Person Count', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 17, 'fontweight': 'bold'})
plt.title("Person Count - Product Category Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 22, 'fontweight': 'bold'})

plt.show()

In [None]:
sample = df.groupby(['Product_Category'], as_index = False)['Amount'].sum().sort_values(by = 'Amount', ascending = False)

plt.figure(figsize = (35, 14), dpi = 700)
sns.barplot(x = 'Product_Category', y = 'Amount', data = sample)

plt.xlabel('Product Category', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 18, 'fontweight': 'bold'})
plt.ylabel('Amount', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 18, 'fontweight': 'bold'})
plt.title("Amount - Product Category Graph", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 25, 'fontweight': 'bold'})

plt.show()

**_The analysis above leads us to the conclusion that the most popular products are in the categories of Food, Clothing, and Electronics._**

In [None]:
sample = df.groupby(['Product_ID'], as_index = False)['Orders'].sum().sort_values(by = 'Orders', ascending = False).head(20)

plt.figure(figsize = (27, 7), dpi = 500)
sns.barplot(x = 'Product_ID', y = 'Orders', data = sample, palette = 'magma')

plt.xlabel('Product ID', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.ylabel('Orders', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.title("Top 20 most sold products", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 20, 'fontweight': 'bold'})

plt.show()

**_We can infer from the data above that the item with the Product ID P00265242 is the most sold item._**

#### **Conclusion**

**_Married women aged 26 to 35 who work in IT, Aviation, and Healthcare in the states of Uttar Pradesh, Maharashtra, and Karnataka are more likely to purchase items from the Food, Clothing & Apparel, and Electronics & Gadgets categories._**