In [None]:
# Sales Data Analysis Project

# Day 1
# 1. Import Library
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 2. Load Dataset
df = pd.read_csv('sales_data.csv', sep=';')

# 3. Quick Look
display(df)
print(df.info())
print(df.describe())

# 4. Data Cleaning
# Membuat kolom Total Sales
if 'Total Sales' not in df.columns or df['Total Sales'].isnull().all():
    df['Total Sales'] = df['Quantity'] * df['Unit Price']
    df.to_csv('sales_data.csv', sep=';', index=False)

# 5. Analisis Produk Terlaris
product_sales = df.groupby('Product')['Total Sales'].sum().sort_values(ascending=False)
print(product_sales)

# Visualisasi Produk Terlaris
product_sales.plot(kind='bar', color='skyblue')
plt.title('Total Sales by Product')
plt.xlabel('Product')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.show()

# 6. Analisis Kota Terlaris
top_city = df.groupby('City')['Total Sales'].sum().sort_values(ascending=False)
print(top_city)

# Visualisasi Kota Terlaris
top_city.plot(kind='bar', color='lightgreen')
plt.title('Total Sales per City')
plt.xlabel('City')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.show()

# 7. Sales Trend Harian
sales_trend = df.groupby('Order Date')['Total Sales'].sum()
sales_trend.plot(kind='line', marker='o', color='orange')
plt.title('Daily Sales Trend')
plt.xlabel('Order Date')
plt.ylabel('Total Sales')
plt.grid(True)
plt.xticks(rotation=45)
plt.show()

# 8. Conclusion Day 1
print("\nConclusion:")
print("- Produk dengan penjualan tertinggi adalah Laptop.")
print("- Kota dengan penjualan tertinggi adalah Jakarta.")
print("- Tren penjualan harian meningkat signifikan menjelang tanggal 10 Januari 2024.")

In [None]:
# Day 2
#1. Cek Missing Item
print("\nCheck Missing Values:\n")
print(df.isnull().sum)

#2. Cek Duplicate Data
print("\nCheck Duplicate Data :\n")
print(df.duplicated().sum)

#3. Preview Data
print("\nPreview Data :\n")
print(df.head())

#4. Statistik Dasar 
print("\nStatistik Deskriptif:\n")
print(df[['Quantity', 'Unit Price', 'Total Sales']].describe())

#5. Produk Terlaris Berdasarkan Total Sales
print("\nTotal Sales per Product:\n")
print(df.groupby('Product')['Total Sales'].sum().sort_values(ascending=False))

#6. Kota dengan Penjualan Terbesar
print("\nTotal Sales per City:\n")
print(df.groupby('City')['Total Sales'].sum().sort_values(ascending=False))

#7. Visualisasi Total Sales per Product

import matplotlib.pyplot as plt

# Grouping Data 
sales_per_product= df.groupby('Product')['Total Sales'].sum()

# Plot
plt.figure(figsize=(8,6))
sales_per_product.sort_values(ascending=False).plot(kind='bar', color='skyblue')
plt.title('Total Sales Product', fontsize=16)
plt.xlabel('Product', fontsize=12)
plt.ylabel('Total Sales', fontsize=12)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

#8. Visualisasi Total Sales per City

# Grouping Data 
sales_per_city= df.groupby('City')['Total Sales'].sum()

# Plot
plt.figure(figsize=(8,6))
sales_per_city.sort_values(ascending=False).plot(kind='bar', color='lightgreen')
plt.title('Total Sales City', fontsize=16)
plt.xlabel('City', fontsize=12)
plt.ylabel('Total Sales', fontsize=12)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

#9. Conclusion Day 2
print("\nConclusion:")
print("- Tidak ada missing data maupun duplikasi data.")
print("- Penjualan paling besar disumbangkan oleh produk Laptop dan kota Jakarta.")
print("- Potensi pertumbuhan bisa difokuskan dengan meningkatkan promosi di kota yang sudah tinggi penjualannya\n"
"serta meningkatkan awareness terhadap produk dengan penjualan rendah.")

In [None]:
# Day 3
#1. Import Library and Load dataset
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('sales_data.csv', sep=';')

#2. Ubah kolom Order Date menjadi datetime
df['Order Time'] = pd.to_datetime(df['Order Date'], format='%d/%m/%Y')

print(df.dtypes)

#3. Extract year, month, day
df['Year'] = df['Order Time'].dt.year
df['Month'] = df['Order Time'].dt.month
df['Day'] = df['Order Time'].dt.day

print(df[['Order Time', 'Year', 'Month', 'Day']])

#4. Total Sales per Month
sales_per_month = df.groupby('Month')['Total Sales'].sum().sort_values(ascending=False)
print("\nTotal Sales Per Month:\n")
print(sales_per_month)

# Visualisasi Total Sales Per Month

plt.figure(figsize=(8,6))
sales_per_month.plot(kind='bar', color='green')
plt.title('Total Sales per Month', fontsize=15)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Total Sales', fontsize=12)
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

#5. Total Sales Per Day
sales_per_day = df.groupby('Day')['Total Sales'].sum().sort_values(ascending=False)
print("\nTotal Sales Per Day:\n")
print(sales_per_day)

# Visualisasi Total Sales Per Day
plt.figure(figsize=(8,6))
sales_per_day.plot(kind='bar', color='orange')
plt.title('Total Sales per Day', fontsize=15)
plt.xlabel('Day', fontsize=12)
plt.ylabel('Total Sales', fontsize=12)
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

#6. Clonclusion Day 3 
print("\nConclusion:")
print("- Berhasil Mengubah Kolom Order Date menjadi Order Time dengan format Day, Month, Year")
print("- Menambah Kolom Year, Month, Day")
print("- Menghitung Total Sales per Bulan dan Memvisualisasikan nya menjadi grafik batang")
print("- Menghitung Total Sales per Hari dan Memvisualisasikan nya menjadi grafik batang")

In [None]:
# Day 4
#1. Import Library and Load dataset
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('sales_data.csv', sep=';')

#2. # Top N Products (misal N = 5)
top_product = df.groupby('Product')['Total Sales'].sum().sort_values(ascending=False).head(5)

print("\nTop 5 Product Berdasarkan Total Sales\n")
print(top_product)

# Visualisasi Top N Product

plt.figure(figsize=(8,6))
top_product.plot(kind='bar', color='black')
plt.title('Top 5 Product by Total Sales', fontsize=16)
plt.xlabel('Product', fontsize=12)
plt.ylabel('Total Sales', fontsize=12)
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

#3. Top N Cities (misal N = 3)
top_cities = df.groupby('City')['Total Sales'].sum().sort_values(ascending=False).head(3)

print("\nTop 3 Cities berdasarkan Total Sales:\n")
print(top_cities)

# Visualisasi Top N Product
plt.figure(figsize=(8,6))
top_cities.plot(kind='bar', color='lightseagreen')
plt.title('Top 3 Cities by Total Sales', fontsize=16)
plt.xlabel('City', fontsize=12)
plt.ylabel('Total Sales', fontsize=12)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

#5. Misal segmentasi sederhana berdasarkan rata-rata sales per city
avg_sales_city = df.groupby('City')['Total Sales'].mean().sort_values(ascending=False)

print("\nRata-rata Total Sales per City:\n")
print(avg_sales_city)

print("\nConclusion Day 4:")
print("- Menampilkan Top 5 Produk berdasarkan Total Sales")
print("- Menampilkan Top 3 Kota berdasarkan Total Sales")
print("- Membuat Visualisasi Bar Chart untuk Produk dan Kota")
print("- Menunjukkan potensi pengembangan fokus di produk dan kota teratas")

In [None]:
# Day 5
#1. Import Library and Load dataset
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('sales_data.csv', sep=';')

#2. Membuat Dashboard Sederhana

fig, axs=plt.subplots(2, 2, figsize=(12, 8))

# Grouping and Summarizing Data Before Visualization
top_product = df.groupby('Product')['Total Sales'].sum().sort_values(ascending=False) 
top_city = df.groupby('City')['Total Sales'].sum().sort_values(ascending=False)
top_cities = df.groupby('City')['Total Sales'].sum().sort_values(ascending=False).head(3)

# Grafik 1: Total sales product
top_product.plot(kind='bar', ax=axs[0, 0], color='skyblue')
axs[0, 0].set_title('Top 5 Products')

# Grafik 2: Total per City
top_city.plot(kind='bar', ax=axs[0,1], color='lightgreen')
axs[0,1].set_title('Total Sales Per City')

# Grafik 3: Top 3 Citiees
top_cities.plot(kind='bar', ax=axs[1,0], color='lightcoral')
axs[1,0].set_title('Top 3 Cities')

# Kosongkan plot ke-4
axs[1,1].axis('off')

# Save Sebelum Show
plt.tight_layout()
plt.savefig('dashboard.png', dpi=300)
plt.show()

# Conclusion Day 5
print("\nConclusion Day 5:")
print("- Membuat dashboard sederhana dengan 3 grafik:")
print("  - Total Sales per Product")
print("  - Total Sales per City")
print("  - Top 3 Cities by Total Sales")
print("- Menggunakan subplot untuk mengatur layout grafik secara rapi")
print("- Menyimpan dashboard ke file gambar (dashboard.png) dengan kualitas tinggi")
print("- Membiasakan clean code dan grouping data sebelum visualisasi")