# **Import Libraries**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# **Read dataset**

In [None]:
# Load the dataset
df = pd.read_csv('sales_data.csv')

In [None]:
df.head()

# **Pre-processing **

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df.dropna(inplace=True)

In [None]:
df.isnull().sum()

In [None]:
# Convert the date column to datetime format
df['date'] = pd.to_datetime(df['date'])


# **What was the total revenue generated by the company over the course of the year?**

In [None]:
# Calculate total revenue generated by the company over the course of the year
total_revenue = df['revenue'].sum()

In [None]:
# Print the total revenue
print(f"Total revenue generated by the company: ${total_revenue:.2f}")

In [None]:
# Total Revenue
plt.figure(figsize=(10, 6))
plt.title('Total Revenue')
plt.bar(['Total Revenue'], [total_revenue])
plt.ylabel('Revenue')
plt.show()

# **Which product had the highest revenue? How much revenue did it generate?**

In [None]:
# Calculate which product had the highest revenue and how much it generated
product_revenue = df.groupby('product')['revenue'].sum()
highest_revenue_product = product_revenue.idxmax()
highest_revenue_product_value = product_revenue.max()

In [None]:
print(f"Product with the highest revenue: {highest_revenue_product} (${highest_revenue_product_value:.2f})")

In [None]:
# Product with the Highest Revenue
plt.figure(figsize=(12, 6))
plt.title('Revenue by Product')
sns.barplot(x=product_revenue.index, y=product_revenue.values)
plt.xticks(rotation=90)
plt.ylabel('Revenue')
plt.show()

# **What was the average price of a product sold by the company?**

In [None]:
# Calculate the average price of a product sold by the company
average_price = df['price'].mean()
print(average_price)

In [None]:
# Average Price of Products Sold
plt.figure(figsize=(10, 6))
plt.title('Average Price of Products Sold')
plt.bar(['Average Price'], [average_price])
plt.ylabel('Price')
plt.show()

# **What was the total quantity of products sold by the company?**

In [None]:
# Calculate the total quantity of products sold by the company
total_quantity = df['quantity'].sum()

In [None]:
print(f"Total quantity of products sold by the company: {total_quantity}")

# **Which category had the highest revenue? How much revenue did it generate?**

In [None]:
# Calculate which category had the highest revenue and how much it generated
category_revenue = df.groupby('category')['revenue'].sum()
highest_revenue_category = category_revenue.idxmax()
highest_revenue_category_value = category_revenue.max()

In [None]:
print(f"Category with the highest revenue: {highest_revenue_category} (${highest_revenue_category_value:.2f})")

In [None]:
# Category with the Highest Revenue
plt.figure(figsize=(12, 6))
plt.title('Revenue by Category')
sns.barplot(x=category_revenue.index, y=category_revenue.values)
plt.xticks(rotation=90)
plt.ylabel('Revenue')
plt.show()

# **What was the average revenue per sale?**

In [None]:
# Calculate the average revenue per sale
average_revenue_per_sale = df['revenue'].mean()

In [None]:
print(f"Average revenue per sale: ${average_revenue_per_sale:.2f}")

# **What was the total revenue generated in each quarter of the year? (i.e. Q1, Q2, Q3, Q4)**

In [None]:
# Calculate the total revenue generated in each quarter of the year
df['quarter'] = df['date'].dt.to_period('Q')
quarterly_revenue = df.groupby('quarter')['revenue'].sum()

In [None]:
print(f"Total revenue generated in each quarter:\n{quarterly_revenue}")

In [None]:
# Quarterly Revenue
plt.figure(figsize=(12, 6))
plt.title('Quarterly Revenue')
quarterly_revenue.plot(kind='bar')
plt.ylabel('Revenue')
plt.show()