# Finance Data Generation Script

In [2]:
import pandas as pd
import numpy as np
import os
from faker import Faker
import random

In [4]:
# Initialize Faker
fake = Faker()

In [6]:
# Parameters
num_companies = 50
num_years = 5
years = np.arange(2019, 2019 + num_years)
industries = ['Technology', 'Finance', 'Healthcare', 'Manufacturing', 'Retail']
sectors = ['Software', 'Banking', 'Biotech', 'Electronics', 'Consumer Goods']
regions = ['North America', 'Europe', 'Asia', 'South America', 'Africa']
company_types = ['Public', 'Private', 'Non-Profit']
revenue_categories = ['Small', 'Medium', 'Large']

In [8]:
# Directory to save files
base_path = r"C:\Users\br1gi\Downloads\Learning\Fin Project"

In [10]:
# Ensure directory exists
if not os.path.exists(base_path):
    os.makedirs(base_path)

In [12]:
# Generate Companies Data
companies_data = {
    'CompanyID': range(1, num_companies + 1),
    'CompanyName': [fake.company() for _ in range(num_companies)],
    'Industry': [random.choice(industries) for _ in range(num_companies)],
    'Sector': [random.choice(sectors) for _ in range(num_companies)],
    'Country': [fake.country() for _ in range(num_companies)],
    'Region': [random.choice(regions) for _ in range(num_companies)],
    'YearFounded': [random.randint(1900, 2020) for _ in range(num_companies)],
    'NumberOfEmployees': [random.randint(50, 5000) for _ in range(num_companies)],
    'SectorSize': [random.uniform(100000000, 10000000000) for _ in range(num_companies)]  # Industry sector size
}

In [14]:
companies_df = pd.DataFrame(companies_data)
companies_df.to_csv(os.path.join(base_path, 'companies.csv'), index=False)

In [16]:
# Generate Assets and Liabilities Data
assets_liabilities = []
for company_id in range(1, num_companies + 1):
    for year in years:
        cash = np.random.uniform(100000, 5000000)
        accounts_receivable = np.random.uniform(50000, 2000000)
        inventory = np.random.uniform(50000, 1000000)
        property_plant_equipment = np.random.uniform(100000, 10000000)
        other_assets = np.random.uniform(50000, 2000000)
        short_term_debt = np.random.uniform(50000, 500000)
        long_term_debt = np.random.uniform(500000, 5000000)
        accounts_payable = np.random.uniform(50000, 500000)
        other_liabilities = np.random.uniform(50000, 1000000)
        
        total_assets = cash + accounts_receivable + inventory + property_plant_equipment + other_assets
        total_liabilities = short_term_debt + long_term_debt + accounts_payable + other_liabilities

        assets_liabilities.append([
            company_id, year, cash, accounts_receivable, inventory, property_plant_equipment, other_assets,
            short_term_debt, long_term_debt, accounts_payable, other_liabilities, total_assets, total_liabilities
        ])

assets_liabilities_df = pd.DataFrame(assets_liabilities, columns=[
    "CompanyID", "Year", "Cash", "Accounts_Receivable", "Inventory", "Property_Plant_Equipment", "Other_Assets",
    "Short_Term_Debt", "Long_Term_Debt", "Accounts_Payable", "Other_Liabilities", "Total_Assets", "Total_Liabilities"
])
assets_liabilities_df.to_csv(os.path.join(base_path, 'assets_liabilities.csv'), index=False)

In [18]:
# Generate Sales Data
sales = []
for company_id in range(1, num_companies + 1):
    for year in years:
        revenue = np.random.uniform(500000, 20000000)
        cost_of_goods_sold = np.random.uniform(100000, 15000000)
        sales.append([
            company_id, year, revenue, cost_of_goods_sold
        ])

sales_df = pd.DataFrame(sales, columns=[
    "CompanyID", "Year", "Revenue", "Cost_of_Goods_Sold"
])
sales_df.to_csv(os.path.join(base_path, 'sales.csv'), index=False)

In [20]:
# Generate Investments Data
investments = []
for company_id in range(1, num_companies + 1):
    for year in years:
        capital_expenditure = np.random.uniform(50000, 5000000)
        equity_investment = np.random.uniform(10000, 1000000)
        investments.append([
            company_id, year, capital_expenditure, equity_investment
        ])

investments_df = pd.DataFrame(investments, columns=[
    "CompanyID", "Year", "Capital_Expenditure", "Equity_Investment"
])
investments_df.to_csv(os.path.join(base_path, 'investments.csv'), index=False)

In [22]:
# Generate Financials Data
financials = []
for company_id in range(1, num_companies + 1):
    for year in years:
        revenue = np.random.uniform(500000, 20000000)
        cogs = np.random.uniform(100000, 15000000)
        gross_profit = revenue - cogs
        operating_income = np.random.uniform(200000, 8000000)
        interest_expense = np.random.uniform(20000, 500000)
        tax_expense = np.random.uniform(10000, 200000)
        net_income = operating_income - interest_expense - tax_expense
        depreciation = np.random.uniform(20000, 1000000)
        ebit = operating_income + interest_expense
        ebitda = ebit + depreciation
        ebt = ebit - tax_expense
        financials.append([
            company_id, year, revenue, cogs, gross_profit, operating_income, 
            interest_expense, tax_expense, net_income, depreciation, ebit, ebitda, ebt
        ])

financials_df = pd.DataFrame(financials, columns=[
    "CompanyID", "Year", "Revenue", "Cost_of_Goods_Sold", "Gross_Profit", "Operating_Income",
    "Interest_Expense", "Tax_Expense", "Net_Income", "Depreciation", "EBIT", "EBITDA", "EBT"
])
financials_df.to_csv(os.path.join(base_path, 'financials.csv'), index=False)

In [24]:
# Generate Cash Flow Data
cash_flows = []
for company_id in range(1, num_companies + 1):
    for year in years:
        operating_cash_flow = np.random.uniform(50000, 5000000)
        investing_cash_flow = np.random.uniform(-2000000, 2000000)
        financing_cash_flow = np.random.uniform(-1000000, 1000000)
        net_cash_flow = operating_cash_flow + investing_cash_flow + financing_cash_flow
        cash_flows.append([
            company_id, year, operating_cash_flow, investing_cash_flow, financing_cash_flow, net_cash_flow
        ])

cash_flow_df = pd.DataFrame(cash_flows, columns=[
    "CompanyID", "Year", "Operating_CashFlow", "Investing_CashFlow", "Financing_CashFlow", "Net_CashFlow"
])
cash_flow_df.to_csv(os.path.join(base_path, 'cash_flow.csv'), index=False)

print("Enhanced data generation completed and files saved.")

Enhanced data generation completed and files saved.
