In [5]:
import random
from faker import Faker
import pandas as pd

fake = Faker()

# Function to generate fake financial data
def generate_financial_data(num_records):
    data = []
    for _ in range(num_records):
        revenue = random.randint(100000, 1000000)
        operating_costs = random.randint(20000, 200000)
        marketing_expenses = random.randint(5000, 50000)
        salaries = random.randint(30000, 150000)
        utilities = random.randint(10000, 50000)
        
        total_expenses = operating_costs + marketing_expenses + salaries + utilities
        profit_loss = revenue - total_expenses
        cash_inflows = random.randint(50000, 200000)
        cash_outflows = random.randint(30000, 150000)
        cash_flow = cash_inflows - cash_outflows
        roi = (profit_loss / total_expenses) * 100 if total_expenses != 0 else 0
        
        data.append({
            'Revenue': revenue,
            'Operating Costs': operating_costs,
            'Marketing Expenses': marketing_expenses,
            'Salaries': salaries,
            'Utilities': utilities,
            'Total Expenses': total_expenses,
            'Profit/Loss': profit_loss,
            'Cash Inflows': cash_inflows,
            'Cash Outflows': cash_outflows,
            'Cash Flow': cash_flow,
            'ROI': roi
        })
    return data

# Generate fake financial data
num_records = 1000
financial_data = generate_financial_data(num_records)

# Create a DataFrame
df = pd.DataFrame(financial_data)

# Save the DataFrame to a CSV file
df.to_csv('financial_data1.csv', index=False)

# Display the DataFrame
print(df.head())


   Revenue  Operating Costs  Marketing Expenses  Salaries  Utilities  \
0   249710            81748                9523     85514      19318   
1   954738           189281               20528     62984      27234   
2   167693            51922               24540     43577      39739   
3   602212           161878               11688     86991      23409   
4   557698           142764               46642    138875      10718   

   Total Expenses  Profit/Loss  Cash Inflows  Cash Outflows  Cash Flow  \
0          196103        53607        192814          48147     144667   
1          300027       654711        129129         144537     -15408   
2          159778         7915        175833          78513      97320   
3          283966       318246         52779         120872     -68093   
4          338999       218699        109286         110052       -766   

          ROI  
0   27.336145  
1  218.217360  
2    4.953748  
3  112.071868  
4   64.513170  


In [3]:
!pip install faker

Collecting faker
  Downloading Faker-24.4.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-24.4.0-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   -- ------------------------------------- 0.1/1.8 MB 1.1 MB/s eta 0:00:02
   ----- ---------------------------------- 0.3/1.8 MB 2.3 MB/s eta 0:00:01
   ------------ --------------------------- 0.5/1.8 MB 3.0 MB/s eta 0:00:01
   ---------------- ----------------------- 0.7/1.8 MB 3.4 MB/s eta 0:00:01
   ------------------------ --------------- 1.1/1.8 MB 4.1 MB/s eta 0:00:01
   ----------------------------- ---------- 1.3/1.8 MB 4.3 MB/s eta 0:00:01
   --------------------------------- ------ 1.5/1.8 MB 4.1 MB/s eta 0:00:01
   ------------------------------------- -- 1.6/1.8 MB 4.0 MB/s eta 0:00:01
   ---------------------------------------- 1.8/1.8 MB 3.9 MB/s eta 0:00:00
Installing collected packages: faker
Successfully

In [1]:
import random
import pandas as pd
from faker import Faker
from datetime import datetime, timedelta

fake = Faker()

# Function to generate fake data for one year
def generate_fake_data(start_date, end_date):
    data = []
    current_date = start_date
    while current_date <= end_date:
        revenue = random.randint(50000, 200000)
        expenses = random.randint(30000, 150000)  # Total expenses for the period
        project_location = random.choice(['Pune', 'Thane', 'Kalyan', 'Lonavala', 'Mumbai', 'PCMC'])
        property_type = random.choice(['Flats', 'Apartment', 'Shop'])
        if property_type == 'Flats':
            subcategory = random.choice(['1BHK', '2BHK', '3BHK'])
        elif property_type == 'Apartment':
            subcategory = random.choice(['4BHK', '5BHK', '4BHK+Garden'])
        else:
            subcategory = random.choice(['350 SQ FT', '400 SQ FT', '275 SQ FT'])
        sold = random.randint(0, 1)
        area_built = random.randint(1000, 10000) if sold == 1 else None
        unsold = random.randint(0, 20)
        
        data.append({
            'Date': current_date,
            'Revenue': revenue,
            'Project_Location': project_location,
            'Property_Type': property_type,
            'Subcategory': subcategory,
            'Sold': sold,
            'Area_Built': area_built,
            'Expenses': expenses,  # Include the total expenses for the period
            'Unsold': unsold
        })
        
        # Move to the next date
        current_date += timedelta(days=random.randint(1, 30))
    
    return data

# Define start and end date for one year
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)

# Generate fake data for one year
fake_data = generate_fake_data(start_date, end_date)

# Create DataFrame
df = pd.DataFrame(fake_data)

# Display DataFrame
print(df)

# Save the DataFrame to a CSV file
df.to_csv('real_estate_data.csv', index=False)

         Date  Revenue Project_Location Property_Type  Subcategory  Sold  \
0  2023-01-01   196823         Lonavala         Flats         3BHK     0   
1  2023-01-16   152536             PCMC          Shop    275 SQ FT     1   
2  2023-01-22    84270           Mumbai     Apartment         4BHK     0   
3  2023-01-31   113761           Kalyan          Shop    400 SQ FT     0   
4  2023-02-24    50502             Pune     Apartment  4BHK+Garden     0   
5  2023-03-07   175106            Thane          Shop    400 SQ FT     0   
6  2023-03-16    93080         Lonavala         Flats         3BHK     0   
7  2023-03-23   184706         Lonavala     Apartment         4BHK     0   
8  2023-04-09   124824            Thane         Flats         1BHK     0   
9  2023-05-07   188625             PCMC         Flats         1BHK     0   
10 2023-05-30   170929         Lonavala          Shop    275 SQ FT     0   
11 2023-06-13   128343             Pune          Shop    275 SQ FT     0   
12 2023-06-1

In [2]:
import random
import pandas as pd
from faker import Faker
from datetime import datetime, timedelta

fake = Faker()

# Function to generate fake data for one year
def generate_fake_data(start_date, end_date):
    data = []
    current_date = start_date
    while current_date <= end_date:
        revenue = random.randint(50000, 200000)
        expenses = random.randint(30000, 150000)  # Total expenses for the period
        project_location = random.choice(['Pune', 'Thane', 'Kalyan', 'Lonavala', 'Mumbai', 'PCMC'])
        property_type = random.choice(['Flats', 'Apartment', 'Shop'])
        if property_type == 'Flats':
            subcategory = random.choice(['1BHK', '2BHK', '3BHK'])
        elif property_type == 'Apartment':
            subcategory = random.choice(['4BHK', '5BHK', '4BHK+Garden'])
        else:
            subcategory = random.choice(['350 SQ FT', '400 SQ FT', '275 SQ FT'])
        sold = random.randint(0, 1)
        area_built = random.randint(1000, 10000) if sold == 1 else None
        profit_loss = revenue - expenses
        unsold = random.randint(0, 20)
        
        data.append({
            'Date': current_date,
            'Revenue': revenue,
            'Project_Location': project_location,
            'Property_Type': property_type,
            'Subcategory': subcategory,
            'Sold': sold,
            'Area_Built': area_built,
            'Expenses': expenses,  # Include the total expenses for the period
            'Profit_Loss': profit_loss,
            'Unsold': unsold
        })
        
        # Move to the next date
        current_date += timedelta(days=1)  # Increment by one day
    
    return data

# Define start and end date for one year
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)

# Generate fake data for one year
fake_data = generate_fake_data(start_date, end_date)

# Create DataFrame
df = pd.DataFrame(fake_data)

# Display DataFrame
print(df)


# Save the DataFrame to a CSV file
df.to_csv('real_estate_data1.csv', index=False)

          Date  Revenue Project_Location Property_Type  Subcategory  Sold  \
0   2023-01-01   189246             Pune         Flats         1BHK     1   
1   2023-01-02   130724             PCMC     Apartment         4BHK     0   
2   2023-01-03   194012         Lonavala         Flats         3BHK     0   
3   2023-01-04   152960           Kalyan     Apartment  4BHK+Garden     0   
4   2023-01-05    56251         Lonavala          Shop    350 SQ FT     1   
..         ...      ...              ...           ...          ...   ...   
360 2023-12-27   191518             PCMC     Apartment  4BHK+Garden     0   
361 2023-12-28   119431             Pune     Apartment         4BHK     0   
362 2023-12-29   121664             Pune     Apartment  4BHK+Garden     1   
363 2023-12-30   162978           Kalyan         Flats         2BHK     1   
364 2023-12-31    77012           Mumbai         Flats         1BHK     0   

     Area_Built  Expenses  Profit_Loss  Unsold  
0        1746.0    123788 