In [3]:
pip install faker

Collecting faker
  Downloading Faker-19.6.1-py3-none-any.whl (1.7 MB)
Installing collected packages: faker
Successfully installed faker-19.6.1
Note: you may need to restart the kernel to use updated packages.


Dataset 01: Retail Dataset

In [5]:
import pandas as pd
import numpy as np

# Setting a random seed for reproducibility
np.random.seed(42)

# Generate a date range for the monthly data (for 3 years to get a good amount of data)
date_range = pd.date_range(start='2020-01-01', end='2022-12-31', freq='M')

# Number of rows (one for each month)
num_rows = len(date_range)

# Generate random data for each KPI metric
data = {
    'Date': date_range,
    'Monthly Sales': np.random.randint(100000, 200000, size=num_rows),
    'Average Transaction Value': np.random.randint(50, 200, size=num_rows),
    'Customer Retention Rate': np.random.uniform(0.5, 0.9, size=num_rows),
    'New Customers Acquired': np.random.randint(100, 1000, size=num_rows),
}

# Create a DataFrame from the data
df = pd.DataFrame(data)

# Generate random data for product category sales
product_categories = ['Electronics', 'Furniture', 'Groceries', 'Clothing', 'Accessories']
for category in product_categories:
    df[f'{category} Sales'] = np.random.randint(10000, 50000, size=num_rows)

# Generate random data for customer demographics
demographics = {
    'Age 18-25': np.random.randint(50, 200, size=num_rows),
    'Age 26-35': np.random.randint(100, 300, size=num_rows),
    'Age 36-45': np.random.randint(80, 250, size=num_rows),
    'Age 46-60': np.random.randint(60, 200, size=num_rows),
    'Age 60+': np.random.randint(30, 100, size=num_rows),
    'Male': np.random.randint(100, 500, size=num_rows),
    'Female': np.random.randint(100, 500, size=num_rows),
    'Other': np.random.randint(10, 50, size=num_rows),
}
for demo, values in demographics.items():
    df[demo] = values

# Generate random data for peak shopping hours
peak_hours = ['Morning (6-12)', 'Afternoon (12-18)', 'Evening (18-24)', 'Night (0-6)']
for hour in peak_hours:
    df[hour] = np.random.randint(100, 1000, size=num_rows)

# Display the first few rows of the DataFrame
df.head()

# Save the DataFrame to a CSV file
df.to_csv('Retail_Dashboard_Data.csv', index=False)


Dataset 02: Finance Dataset

In [13]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from faker import Faker

# Initialize Faker for generating fake data
fake = Faker()

# Generate data for 36 months (3 years)
start_date = '2021-01-01'
end_date = '2023-12-31'
dates = pd.date_range(start=start_date, end=end_date, freq='D')

# Initialize an empty DataFrame
data = pd.DataFrame({'Month': dates})

# Generate Assets Under Management data
data['Assets Under Management'] = np.random.randint(1000000, 10000000, size=len(months))

# Generate Net Profit Margin data (as a percentage)
data['Net Profit Margin'] = np.random.uniform(5, 20, size=len(months))

# Generate Customer Churn rate (as a percentage)
data['Customer Churn'] = np.random.uniform(1, 10, size=len(months))

# Generate Cost Per Acquisition data
data['Cost Per Acquisition'] = np.random.randint(100, 1000, size=len(months))

# Generate Investment Types data
investment_types = ['Stocks', 'Bonds', 'Real Estate', 'Mutual Funds']
for inv_type in investment_types:
    data[inv_type] = np.random.randint(50000, 500000, size=len(months))

# Generate Monthly Inflow and Outflow data
data['Monthly Inflow'] = np.random.randint(100000, 500000, size=len(months))
data['Monthly Outflow'] = np.random.randint(80000, 450000, size=len(months))

# Generate Customer Age data
data['Customer Age'] = [fake.random_int(min=18, max=70) for _ in range(len(months))]

# Generate Customer Age Group data
age_bins = [0, 30, 45, 60, 100]
age_labels = ['18-30', '31-45', '46-60', '61+']
data['Customer Age Group'] = pd.cut(data['Customer Age'], bins=age_bins, labels=age_labels)

# Generate Customer Feedback data
feedback_options = ['Excellent', 'Good', 'Neutral', 'Poor']
data['Customer Feedback'] = [random.choice(feedback_options) for _ in range(len(months))]

# Save the generated data to a CSV file
data.to_csv('financial_data.csv', index=False)

Dataset 03 

In [15]:
import pandas as pd
import numpy as np
from faker import Faker

# Initialize Faker for generating fake data
fake = Faker()

# Generate data for 84 months (7 years)
start_date = '2016-01-01'
end_date = '2022-12-31'
months = pd.date_range(start=start_date, end=end_date, freq='M')

# Initialize an empty DataFrame
data = pd.DataFrame({'Month': months})

# Ensure we have at least 1000 rows
while len(data) < 1000:
    new_data = pd.DataFrame({'Month': pd.date_range(start=data['Month'].max() + pd.DateOffset(months=1), 
                                                   periods=10, freq='M')})
    data = pd.concat([data, new_data], ignore_index=True)

# Generate ROI (Return on Investment) data as a percentage
data['ROI'] = np.random.uniform(0.01, 0.10, size=len(data)) * 100

# Generate IRR (Internal Rate of Return) data as a percentage
data['IRR'] = np.random.uniform(0.05, 0.15, size=len(data)) * 100

# Generate Investment Performance data
data['Investment Performance'] = np.random.uniform(0.02, 0.08, size=len(data)) * 100

# Generate fake investment breakdown data
investment_categories = ['Stocks', 'Bonds', 'Real Estate', 'Mutual Funds', 'Cryptocurrency']
for category in investment_categories:
    data[category] = np.random.randint(10000, 1000000, size=len(data))

# Generate fake risk assessment data
risk_scores = ['Low', 'Medium', 'High']
data['Risk Assessment'] = [fake.random_element(elements=risk_scores) for _ in range(len(data))]

# Save the generated data to a CSV file
data.to_csv('portfolio_performance_data_monthly.csv', index=False)


Dataset 04

Dataset 05

Dataset 06

Dataset 07

Dataset 08

Dataset 09

Dataset 10