## Python Data Analysis
##### This is an exercise from pynative on conducting data analysis using Python.

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

#### The code below shows the different columns and their names to conduct data analysis.

In [None]:
data = pd.read_csv('../input/question1sabudh/Dataset_Ques_1_4.csv')
month = data['month_number']
face_cream = data['facecream']
face_wash = data['facewash']
toothpaste = data['toothpaste']
bathing_soap= data['bathingsoap']
shampoo = data['shampoo']
moisturizer = data['moisturizer']
total_units = data['total_units']
total_profits= data['total_profit']

#### Total profits for the whole year.
##### When taking a deeper dive, sales increased during the holiday season, but dropped dramatically towards the end of the year.

In [None]:
plt.plot(month, total_profits)
plt.title("Company profit per month")
plt.xlabel("Month Number")
plt.ylabel("Total Profit")
plt.xticks(month)
# plt.yticks([10000, 20000, 30000, 40000, 50000])
plt.show()

In [None]:
plt.plot(month, total_units, color='red', marker='o', linestyle='--', linewidth=3, mfc='black', label='Profit data of last year')
plt.title("Company profit per month")
plt.xlabel("Month Number")
plt.ylabel("Sold units number")
plt.xticks(month)
plt.yticks([10000, 20000, 30000, 40000, 50000])
plt.legend(loc='lower right')
plt.show()

#### All product sales data during the year.
##### It is clear from the graph below that the highest selling item was bathing soap, and the least was moisturizer. This could show the company that they need to adverise the moisturizer more in order to increase sales.

In [None]:
plt.plot(month, face_cream, color='blue', linewidth=3, marker='o', label='Face cream sales data')
plt.plot(month, face_wash, color='orange', linewidth=3, marker='o', label='Face wash sales data')
plt.plot(month, toothpaste, color='green', linewidth=3, marker='o', label='Toothpaste sales data')
plt.plot(month, bathing_soap, color='red', linewidth=3, marker='o', label='Bathing soap sales data')
plt.plot(month, shampoo, color='purple', linewidth=3, marker='o', label='Shampoo sales data')
plt.plot(month, moisturizer, color='brown', linewidth=3, marker='o', label='Moisturizer sales data')
plt.title("Sales data")
plt.xlabel("Month Number")
plt.ylabel("Sales units in number")
plt.xticks(month)
plt.yticks([1000, 2000, 4000, 6000, 8000, 10000, 12000, 15000, 18000])
plt.legend(loc='upper left')
plt.show()

#### Toothpaste sales
##### The plot below shows the companies toothpaste sales throughout the year. The highest sale months were towards the end of the year, while the lowest sales were in the middle of the year. This could show the company that they need to advertise more during these months to increase sales.

In [None]:
plt.scatter(month, toothpaste, color='blue', alpha=0.75, label='Toothpaste sales data')
plt.title("Toothpaste sales data")
plt.xlabel("Month number")
plt.ylabel("Number of units sold")
plt.xticks(month)
plt.yticks([4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000])
plt.legend(loc='upper left')
plt.grid(True, linewidth= 1, linestyle='--')
plt.show()

#### Facewash v Facecream sales
##### The plot below compares facewash and facecream sales. Below, it is evident that facecream sells more than facewash, with August being the highest month of sales for facecream, and May being the highest month of sales for facewash.

In [None]:
plt.bar([a-0.25 for a in month], face_cream, width=0.25, label='Face cream sales data', align='edge')
plt.bar([a+0.25 for a in month], face_wash, width=-0.25, label='Face wash sales data', align='edge')
plt.title("Facecream and Facewash sales data")
plt.xlabel("Month number")
plt.ylabel(" Sales units in number")
plt.xticks(month)
plt.yticks([500, 1000, 1500, 2000, 2500, 3000, 3500])
plt.legend(loc='upper left')
plt.grid(True, linewidth= 1, linestyle='--')
plt.show()

#### Bath soap sales data
##### The plot below shows the sales for bath soap. It is clear that the holiday season sees the highest sales, whereas the beginning of the year shows the least amount of sales.

In [None]:
plt.bar(month, bathing_soap)
plt.title("Bathing soap sales data")
plt.xlabel("Month number")
plt.ylabel("Sales units in number")
plt.xticks(month)
plt.yticks([2000, 4000, 6000, 8000, 10000, 12000, 14000])
plt.grid(True, linewidth= 1, linestyle='--')
plt.show()

In [None]:
profit_range = [150000, 175000, 200000, 225000, 250000, 300000, 350000]
plt.hist(total_profits, profit_range, label = 'Profit data')
plt.xlabel('profit range in dollar')
plt.ylabel('Actual Profit in dollar')
plt.legend(loc='upper left')
plt.xticks(profit_range)
plt.title('Profit data')
plt.show()

#### Sales data for all products
##### The pie chart shows that bathing soap is the most popular item at 40%, whereas facewash is the least at 6%.

In [None]:
labels = ['Facecream', 'Facewash', 'Toothpaste', 'Bathing soap', 'Shampoo', 'Moisturizer']
sales_data = [data ['facecream'].sum(), data ['facewash'].sum(), data ['toothpaste'].sum(), data ['bathingsoap'].sum(), data ['shampoo'].sum(), data ['moisturizer'].sum()]
plt.pie(sales_data, labels=labels, autopct='%1.1f%%', wedgeprops={'edgecolor': 'black'})
plt.title("Sales data")
plt.legend(loc='lower right')
plt.show()

#### Bath soap v facewash
##### Sales for bath soap is steady throughout the year and increases during the holiday season. Sales for facewash is up and down throughout the year.

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True)
ax1.plot(month, bathing_soap, color='black', marker='o', linewidth=3)
ax2.plot(month, face_wash, color='red', marker='o', linewidth=3)
# ax1
ax1.set_title('Sales data of bathing soap')
# ax2
ax2.set_title('Sales data of facewash')
ax2.set_xlabel('Month number')
ax2.set_ylabel('Sales units in number')
plt.xticks(month)
plt.show()

In [None]:
plt.plot(month, face_cream, color='blue', label='Face cream sales data')
plt.plot(month, face_wash, color='orange', label='Face wash sales data')
plt.plot(month, toothpaste, color='green', label='Toothpaste sales data')
plt.plot(month, bathing_soap, color='red', label='Bathing soap sales data')
plt.plot(month, shampoo, color='purple', label='Shampoo sales data')
plt.plot(month, moisturizer, color='brown', label='Moisturizer sales data')
plt.fill_between(month, face_cream, interpolate=True, alpha=0.25)
plt.fill_between(month, face_wash, interpolate=True, alpha=0.25)
plt.fill_between(month, toothpaste, interpolate=True, alpha=0.25)
plt.fill_between(month, bathing_soap, interpolate=True, alpha=0.25)
plt.fill_between(month, shampoo, interpolate=True, alpha=0.25)
plt.fill_between(month, moisturizer, interpolate=True, alpha=0.25)
plt.title("Sales data")
plt.xlabel("Month Number")
plt.ylabel("Sales units in number")
plt.xticks(month)
# plt.yticks([5000, 10000, 15000, 20000, 25000, 30000])
plt.legend(loc='upper left')
plt.show()