In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import os

In [3]:
def generate_synthetic_data(start_date, end_date):
    current_date = start_date
    order_id = 1
    data = []

    while current_date <= end_date:
        orders_today = random.randint(900, 1100)
        for _ in range(orders_today):
            product_id = random.choice([0, 1])
            quantity = random.randint(1, 10)
            status = 1 if random.random() < 0.99 else 0
            duration = quantity + random.uniform(1, 2)
            yield_rate = random.uniform(0, 1)
            qa_comments = (f"Order {order_id} had yield {yield_rate:.2f}. "
                           f"{'Successful batch.' if yield_rate > 0.5 else 'Failed batch.'}")
            
            data.append({
                'order_id': order_id,
                'date': current_date,
                'product_id': product_id,
                'quantity': quantity,
                'status': status,
                'duration': duration,
                'yield': yield_rate,
                'qa_comments': qa_comments
            })
            
            order_id += 1
        
        current_date += timedelta(days=1)
    
    return pd.DataFrame(data)

In [4]:
def save_to_local(df, folder_path, year):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    file_path = os.path.join(folder_path, f"{year}.csv")
    df.to_csv(file_path, index=False)
    print(f"Saved data for year {year} to {file_path}")


In [6]:
# Define the date range
start_date = datetime(2000, 1, 1)
end_date = datetime.now()

# Generate and save data year by year
current_year = start_date.year
while current_year <= end_date.year:
    year_start_date = datetime(current_year, 1, 1) if current_year != 2010 else start_date
    year_end_date = datetime(current_year, 12, 31) if current_year != end_date.year else end_date
    
    df = generate_synthetic_data(year_start_date, year_end_date)
    save_to_local(df, 'local_data_folder', current_year)
    
    current_year += 1

Saved data for year 2000 to local_data_folder/2000.csv
Saved data for year 2001 to local_data_folder/2001.csv
Saved data for year 2002 to local_data_folder/2002.csv
Saved data for year 2003 to local_data_folder/2003.csv
Saved data for year 2004 to local_data_folder/2004.csv
Saved data for year 2005 to local_data_folder/2005.csv
Saved data for year 2006 to local_data_folder/2006.csv
Saved data for year 2007 to local_data_folder/2007.csv
Saved data for year 2008 to local_data_folder/2008.csv
Saved data for year 2009 to local_data_folder/2009.csv
Saved data for year 2010 to local_data_folder/2010.csv
Saved data for year 2011 to local_data_folder/2011.csv
Saved data for year 2012 to local_data_folder/2012.csv
Saved data for year 2013 to local_data_folder/2013.csv
Saved data for year 2014 to local_data_folder/2014.csv
Saved data for year 2015 to local_data_folder/2015.csv
Saved data for year 2016 to local_data_folder/2016.csv
Saved data for year 2017 to local_data_folder/2017.csv
Saved data